ma_check.c 208 KB
Newer Older
1 2 3 4
/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6 7 8 9 10 11 12 13 14 15 16 17

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* Describe, check and repair of MARIA tables */

18 19 20 21 22
/*
  About checksum calculation.

  There are two types of checksums. Table checksum and row checksum.

unknown's avatar
unknown committed
23
  Row checksum is an additional uchar at the end of dynamic length
24 25 26 27 28 29 30 31 32 33
  records. It must be calculated if the table is configured for them.
  Otherwise they must not be used. The variable
  MYISAM_SHARE::calc_checksum determines if row checksums are used.
  MI_INFO::checksum is used as temporary storage during row handling.
  For parallel repair we must assure that only one thread can use this
  variable. There is no problem on the write side as this is done by one
  thread only. But when checking a record after read this could go
  wrong. But since all threads read through a common read buffer, it is
  sufficient if only one thread checks it.

unknown's avatar
unknown committed
34
  Table checksum is an eight uchar value in the header of the index file.
35 36 37 38 39 40 41 42
  It can be calculated even if row checksums are not used. The variable
  MI_CHECK::glob_crc is calculated over all records.
  MI_SORT_PARAM::calc_checksum determines if this should be done. This
  variable is not part of MI_CHECK because it must be set per thread for
  parallel repair. The global glob_crc must be changed by one thread
  only. And it is sufficient to calculate the checksum once only.
*/

43
#include "ma_ftdefs.h"
44 45 46 47 48
#include "ma_rt_index.h"
#include "ma_blockrec.h"
#include "trnman.h"
#include "ma_key_recover.h"

49 50 51 52 53 54 55 56 57
#include <stdarg.h>
#include <my_getopt.h>
#ifdef HAVE_SYS_VADVISE_H
#include <sys/vadvise.h>
#endif
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif

unknown's avatar
unknown committed
58
/* Functions defined in this file */
59

unknown's avatar
unknown committed
60
static int check_k_link(HA_CHECK *param, MARIA_HA *info, my_off_t next_link);
61
static int chk_index(HA_CHECK *param, MARIA_HA *info,MARIA_KEYDEF *keyinfo,
unknown's avatar
unknown committed
62
		     my_off_t page, uchar *buff, ha_rows *keys,
63 64 65
		     ha_checksum *key_checksum, uint level);
static uint isam_key_length(MARIA_HA *info,MARIA_KEYDEF *keyinfo);
static ha_checksum calc_checksum(ha_rows count);
66
static int writekeys(MARIA_SORT_PARAM *sort_param);
unknown's avatar
unknown committed
67 68
static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
                          MARIA_KEYDEF *keyinfo,
69
			  my_off_t pagepos, File new_file);
unknown's avatar
unknown committed
70 71
static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
72
static int sort_get_next_record(MARIA_SORT_PARAM *sort_param);
unknown's avatar
unknown committed
73 74 75
static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
                        const void *b);
static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
unknown's avatar
unknown committed
76 77
                                   const uchar *a);
static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a);
78
static my_off_t get_record_for_key(MARIA_HA *info,MARIA_KEYDEF *keyinfo,
unknown's avatar
unknown committed
79
                                   const uchar *key);
80 81
static int sort_insert_key(MARIA_SORT_PARAM  *sort_param,
                           reg1 SORT_KEY_BLOCKS *key_block,
unknown's avatar
unknown committed
82
			   const uchar *key, my_off_t prev_block);
83 84 85 86
static int sort_delete_record(MARIA_SORT_PARAM *sort_param);
/*static int _ma_flush_pending_blocks(HA_CHECK *param);*/
static SORT_KEY_BLOCKS	*alloc_key_blocks(HA_CHECK *param, uint blocks,
					  uint buffer_length);
unknown's avatar
unknown committed
87
static ha_checksum maria_byte_checksum(const uchar *buf, uint length);
88
static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share);
unknown's avatar
unknown committed
89
static void restore_data_file_type(MARIA_SHARE *share);
90
static void change_data_file_descriptor(MARIA_HA *info, File new_file);
91
static void unuse_data_file_descriptor(MARIA_HA *info);
92
static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
93
                                      MARIA_HA *info, uchar *record);
94 95
static void copy_data_file_state(MARIA_STATE_INFO *to,
                                 MARIA_STATE_INFO *from);
unknown's avatar
unknown committed
96 97
static void report_keypage_fault(HA_CHECK *param, MARIA_HA *info,
                                 my_off_t position);
unknown's avatar
unknown committed
98
static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file);
unknown's avatar
unknown committed
99 100
static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
                                                 MARIA_HA *info);
101

102

unknown's avatar
unknown committed
103
void maria_chk_init(HA_CHECK *param)
104
{
unknown's avatar
unknown committed
105
  bzero((uchar*) param,sizeof(*param));
106 107 108 109 110 111 112 113 114 115 116 117 118
  param->opt_follow_links=1;
  param->keys_in_use= ~(ulonglong) 0;
  param->search_after_block=HA_OFFSET_ERROR;
  param->auto_increment_value= 0;
  param->use_buffers=USE_BUFFER_INIT;
  param->read_buffer_length=READ_BUFFER_INIT;
  param->write_buffer_length=READ_BUFFER_INIT;
  param->sort_buffer_length=SORT_BUFFER_INIT;
  param->sort_key_blocks=BUFFERS_WHEN_SORTING;
  param->tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
  param->myf_rw=MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL);
  param->start_check_pos=0;
  param->max_record_length= LONGLONG_MAX;
unknown's avatar
unknown committed
119
  param->pagecache_block_size= KEY_CACHE_BLOCK_SIZE;
120 121 122 123 124 125 126
  param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
}

	/* Check the status flags for the table */

int maria_chk_status(HA_CHECK *param, register MARIA_HA *info)
{
127
  MARIA_SHARE *share= info->s;
128 129 130 131 132 133 134

  if (maria_is_crashed_on_repair(info))
    _ma_check_print_warning(param,
			   "Table is marked as crashed and last repair failed");
  else if (maria_is_crashed(info))
    _ma_check_print_warning(param,
			   "Table is marked as crashed");
135
  if (share->state.open_count != (uint) (share->global_changed ? 1 : 0))
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
  {
    /* Don't count this as a real warning, as check can correct this ! */
    uint save=param->warning_printed;
    _ma_check_print_warning(param,
			   share->state.open_count==1 ?
			   "%d client is using or hasn't closed the table properly" :
			   "%d clients are using or haven't closed the table properly",
			   share->state.open_count);
    /* If this will be fixed by the check, forget the warning */
    if (param->testflag & T_UPDATE_STATE)
      param->warning_printed=save;
  }
  return 0;
}

unknown's avatar
unknown committed
151 152 153
/*
  Check delete links in row data
*/
154

155 156
int maria_chk_del(HA_CHECK *param, register MARIA_HA *info,
                  ulonglong test_flag)
157
{
158
  MARIA_SHARE *share= info->s;
159 160 161 162 163 164 165
  reg2 ha_rows i;
  uint delete_link_length;
  my_off_t empty,next_link,old_link;
  char buff[22],buff2[22];
  DBUG_ENTER("maria_chk_del");

  LINT_INIT(old_link);
unknown's avatar
unknown committed
166

167 168
  param->record_checksum=0;

169
  if (share->data_file_type == BLOCK_RECORD)
unknown's avatar
unknown committed
170 171
    DBUG_RETURN(0);                             /* No delete links here */

172 173
  delete_link_length=((share->options & HA_OPTION_PACK_RECORD) ? 20 :
		      share->rec_reflength+1);
174 175 176 177

  if (!(test_flag & T_SILENT))
    puts("- check record delete-chain");

178
  next_link=share->state.dellink;
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
  if (info->state->del == 0)
  {
    if (test_flag & T_VERBOSE)
    {
      puts("No recordlinks");
    }
  }
  else
  {
    if (test_flag & T_VERBOSE)
      printf("Recordlinks:    ");
    empty=0;
    for (i= info->state->del ; i > 0L && next_link != HA_OFFSET_ERROR ; i--)
    {
      if (*_ma_killed_ptr(param))
        DBUG_RETURN(1);
      if (test_flag & T_VERBOSE)
	printf(" %9s",llstr(next_link,buff));
      if (next_link >= info->state->data_file_length)
	goto wrong;
199
      if (my_pread(info->dfile.file, (uchar*) buff, delete_link_length,
200 201 202 203 204 205 206 207 208 209 210 211 212 213
		   next_link,MYF(MY_NABP)))
      {
	if (test_flag & T_VERBOSE) puts("");
	_ma_check_print_error(param,"Can't read delete-link at filepos: %s",
		    llstr(next_link,buff));
	DBUG_RETURN(1);
      }
      if (*buff != '\0')
      {
	if (test_flag & T_VERBOSE) puts("");
	_ma_check_print_error(param,"Record at pos: %s is not remove-marked",
		    llstr(next_link,buff));
	goto wrong;
      }
214
      if (share->options & HA_OPTION_PACK_RECORD)
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
      {
	my_off_t prev_link=mi_sizekorr(buff+12);
	if (empty && prev_link != old_link)
	{
	  if (test_flag & T_VERBOSE) puts("");
	  _ma_check_print_error(param,"Deleted block at %s doesn't point back at previous delete link",llstr(next_link,buff2));
	  goto wrong;
	}
	old_link=next_link;
	next_link=mi_sizekorr(buff+4);
	empty+=mi_uint3korr(buff+1);
      }
      else
      {
	param->record_checksum+=(ha_checksum) next_link;
230
	next_link= _ma_rec_pos(info, buff+1);
231
	empty+=share->base.pack_reclength;
232 233
      }
    }
234
    if (info->state->del && (test_flag & T_VERBOSE))
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
      puts("\n");
    if (empty != info->state->empty)
    {
      _ma_check_print_warning(param,
			     "Found %s deleted space in delete link chain. Should be %s",
			     llstr(empty,buff2),
			     llstr(info->state->empty,buff));
    }
    if (next_link != HA_OFFSET_ERROR)
    {
      _ma_check_print_error(param,
			   "Found more than the expected %s deleted rows in delete link chain",
			   llstr(info->state->del, buff));
      goto wrong;
    }
    if (i != 0)
    {
      _ma_check_print_error(param,
			   "Found %s deleted rows in delete link chain. Should be %s",
			   llstr(info->state->del - i, buff2),
			   llstr(info->state->del, buff));
      goto wrong;
    }
  }
  DBUG_RETURN(0);

wrong:
  param->testflag|=T_RETRY_WITHOUT_QUICK;
263 264
  if (test_flag & T_VERBOSE)
    puts("");
265 266 267 268 269
  _ma_check_print_error(param,"record delete-link-chain corrupted");
  DBUG_RETURN(1);
} /* maria_chk_del */


unknown's avatar
unknown committed
270
/* Check delete links in index file */
271

272
static int check_k_link(HA_CHECK *param, register MARIA_HA *info,
unknown's avatar
unknown committed
273
                        my_off_t next_link)
274
{
275 276
  MARIA_SHARE *share= info->s;
  uint block_size= share->block_size;
277
  ha_rows records;
278 279
  char llbuff[21], llbuff2[21];
  uchar *buff;
280 281
  DBUG_ENTER("check_k_link");

282 283 284
  if (next_link == HA_OFFSET_ERROR)
    DBUG_RETURN(0);                             /* Avoid printing empty line */

285 286 287 288 289 290 291
  records= (ha_rows) (info->state->key_file_length / block_size);
  while (next_link != HA_OFFSET_ERROR && records > 0)
  {
    if (*_ma_killed_ptr(param))
      DBUG_RETURN(1);
    if (param->testflag & T_VERBOSE)
      printf("%16s",llstr(next_link,llbuff));
292 293 294 295 296 297 298 299 300 301 302 303

    /* Key blocks must lay within the key file length entirely. */
    if (next_link + block_size > info->state->key_file_length)
    {
      /* purecov: begin tested */
      _ma_check_print_error(param, "Invalid key block position: %s  "
                            "key block size: %u  file_length: %s",
                            llstr(next_link, llbuff), block_size,
                            llstr(info->state->key_file_length, llbuff2));
      DBUG_RETURN(1);
      /* purecov: end */
    }
304

unknown's avatar
unknown committed
305 306
    /* Key blocks must be aligned at block_size */
    if (next_link & (block_size -1))
307 308 309 310
    {
      /* purecov: begin tested */
      _ma_check_print_error(param, "Mis-aligned key block: %s  "
                            "minimum key block length: %u",
unknown's avatar
unknown committed
311 312
                            llstr(next_link, llbuff),
                            block_size);
313
      DBUG_RETURN(1);
314 315
      /* purecov: end */
    }
unknown's avatar
unknown committed
316

317 318
    DBUG_ASSERT(share->pagecache->block_size == block_size);
    if (!(buff= pagecache_read(share->pagecache,
319 320
                               &share->kfile,
                               (pgcache_page_no_t) (next_link / block_size),
unknown's avatar
unknown committed
321
                               DFLT_INIT_HITS,
unknown's avatar
unknown committed
322
                               (uchar*) info->buff,
323
                               PAGECACHE_READ_UNKNOWN_PAGE,
unknown's avatar
unknown committed
324
                               PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
325 326 327 328
    {
      /* purecov: begin tested */
      _ma_check_print_error(param, "key cache read error for block: %s",
                            llstr(next_link,llbuff));
329
      DBUG_RETURN(1);
330 331
      /* purecov: end */
    }
332
    if (_ma_get_keynr(info->s, buff) != MARIA_DELETE_KEY_NR)
333 334 335
      _ma_check_print_error(param, "Page at %s is not delete marked",
                            llstr(next_link, llbuff));

336
    next_link= mi_sizekorr(buff + share->keypage_header);
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
    records--;
    param->key_file_blocks+=block_size;
  }
  if (param->testflag & T_VERBOSE)
  {
    if (next_link != HA_OFFSET_ERROR)
      printf("%16s\n",llstr(next_link,llbuff));
    else
      puts("");
  }
  DBUG_RETURN (next_link != HA_OFFSET_ERROR);
} /* check_k_link */


	/* Check sizes of files */

int maria_chk_size(HA_CHECK *param, register MARIA_HA *info)
{
355
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
356
  int error;
357 358 359 360
  register my_off_t skr,size;
  char buff[22],buff2[22];
  DBUG_ENTER("maria_chk_size");

unknown's avatar
unknown committed
361 362
  if (!(param->testflag & T_SILENT))
    puts("- check file-size");
363

unknown's avatar
unknown committed
364 365 366 367 368 369 370 371
  /*
    The following is needed if called externally (not from maria_chk).
    To get a correct physical size we need to flush them.
  */
  if ((error= _ma_flush_table_files(info,
                                    MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
                                    FLUSH_FORCE_WRITE, FLUSH_FORCE_WRITE)))
    _ma_check_print_error(param, "Failed to flush data or index file");
372

373
  size= my_seek(share->kfile.file, 0L, MY_SEEK_END, MYF(MY_THREADSAFE));
374 375 376
  if ((skr=(my_off_t) info->state->key_file_length) != size)
  {
    /* Don't give error if file generated by mariapack */
377
    if (skr > size && maria_is_any_key_active(share->state.key_map))
378 379 380 381 382 383
    {
      error=1;
      _ma_check_print_error(param,
			   "Size of indexfile is: %-8s        Should be: %s",
			   llstr(size,buff), llstr(skr,buff2));
    }
unknown's avatar
unknown committed
384
    else if (!(param->testflag & T_VERY_SILENT))
385 386 387 388 389
      _ma_check_print_warning(param,
			     "Size of indexfile is: %-8s      Should be: %s",
			     llstr(size,buff), llstr(skr,buff2));
  }
  if (!(param->testflag & T_VERY_SILENT) &&
390
      ! (share->options & HA_OPTION_COMPRESS_RECORD) &&
391
      ulonglong2double(info->state->key_file_length) >
392
      ulonglong2double(share->base.margin_key_file_length)*0.9)
393 394
    _ma_check_print_warning(param,"Keyfile is almost full, %10s of %10s used",
			   llstr(info->state->key_file_length,buff),
395
			   llstr(share->base.max_key_file_length-1,buff));
396

unknown's avatar
unknown committed
397
  size= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
398
  skr=(my_off_t) info->state->data_file_length;
399
  if (share->options & HA_OPTION_COMPRESS_RECORD)
400 401 402
    skr+= MEMMAP_EXTRA_MARGIN;
#ifdef USE_RELOC
  if (info->data_file_type == STATIC_RECORD &&
403 404
      skr < (my_off_t) share->base.reloc*share->base.min_pack_length)
    skr=(my_off_t) share->base.reloc*share->base.min_pack_length;
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423
#endif
  if (skr != size)
  {
    info->state->data_file_length=size;	/* Skip other errors */
    if (skr > size && skr != size + MEMMAP_EXTRA_MARGIN)
    {
      error=1;
      _ma_check_print_error(param,"Size of datafile is: %-9s         Should be: %s",
		    llstr(size,buff), llstr(skr,buff2));
      param->testflag|=T_RETRY_WITHOUT_QUICK;
    }
    else
    {
      _ma_check_print_warning(param,
			     "Size of datafile is: %-9s       Should be: %s",
			     llstr(size,buff), llstr(skr,buff2));
    }
  }
  if (!(param->testflag & T_VERY_SILENT) &&
424
      !(share->options & HA_OPTION_COMPRESS_RECORD) &&
425
      ulonglong2double(info->state->data_file_length) >
426
      (ulonglong2double(share->base.max_data_file_length)*0.9))
427 428
    _ma_check_print_warning(param, "Datafile is almost full, %10s of %10s used",
			   llstr(info->state->data_file_length,buff),
429
			   llstr(share->base.max_data_file_length-1,buff2));
430 431 432 433
  DBUG_RETURN(error);
} /* maria_chk_size */


unknown's avatar
unknown committed
434
/* Check keys */
435 436 437 438 439 440 441

int maria_chk_key(HA_CHECK *param, register MARIA_HA *info)
{
  uint key,found_keys=0,full_text_keys=0,result=0;
  ha_rows keys;
  ha_checksum old_record_checksum,init_checksum;
  my_off_t all_keydata,all_totaldata,key_totlength,length;
442
  double  *rec_per_key_part;
443
  MARIA_SHARE *share= info->s;
444 445 446 447 448 449 450
  MARIA_KEYDEF *keyinfo;
  char buff[22],buff2[22];
  DBUG_ENTER("maria_chk_key");

  if (!(param->testflag & T_SILENT))
    puts("- check key delete-chain");

451 452
  param->key_file_blocks=share->base.keystart;
  if (check_k_link(param, info, share->state.key_del))
unknown's avatar
unknown committed
453 454 455 456 457
  {
    if (param->testflag & T_VERBOSE) puts("");
    _ma_check_print_error(param,"key delete-link-chain corrupted");
    DBUG_RETURN(-1);
  }
458

459 460
  if (!(param->testflag & T_SILENT))
    puts("- check index reference");
461 462 463

  all_keydata=all_totaldata=key_totlength=0;
  init_checksum=param->record_checksum;
464
  old_record_checksum=0;
unknown's avatar
unknown committed
465 466 467 468
  if (share->data_file_type == STATIC_RECORD)
    old_record_checksum= (calc_checksum(info->state->records +
                                        info->state->del-1) *
                          share->base.pack_reclength);
469
  rec_per_key_part= param->new_rec_per_key_part;
470 471 472 473 474 475 476 477 478
  for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
       rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++)
  {
    param->key_crc[key]=0;
    if (! maria_is_key_active(share->state.key_map, key))
    {
      /* Remember old statistics for key */
      memcpy((char*) rec_per_key_part,
	     (char*) (share->state.rec_per_key_part +
479
		      (uint) (rec_per_key_part - param->new_rec_per_key_part)),
480 481 482 483 484 485 486 487 488 489 490 491 492 493
	     keyinfo->keysegs*sizeof(*rec_per_key_part));
      continue;
    }
    found_keys++;

    param->record_checksum=init_checksum;

    bzero((char*) &param->unique_count,sizeof(param->unique_count));
    bzero((char*) &param->notnull_count,sizeof(param->notnull_count));

    if ((!(param->testflag & T_SILENT)))
      printf ("- check data record references index: %d\n",key+1);
    if (keyinfo->flag & HA_FULLTEXT)
      full_text_keys++;
494 495 496 497
    if (share->state.key_root[key] == HA_OFFSET_ERROR)
    {
      if (info->state->records != 0 && !(keyinfo->flag & HA_FULLTEXT))
        _ma_check_print_error(param, "Key tree %u is empty", key + 1);
498
      goto do_stat;
499 500 501 502
    }
    if (!_ma_fetch_keypage(info, keyinfo, share->state.key_root[key],
                           PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
                           info->buff, 0, 0))
503
    {
unknown's avatar
unknown committed
504
      report_keypage_fault(param, info, share->state.key_root[key]);
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
      if (!(param->testflag & T_INFO))
	DBUG_RETURN(-1);
      result= -1;
      continue;
    }
    param->key_file_blocks+=keyinfo->block_length;
    keys=0;
    param->keydata=param->totaldata=0;
    param->key_blocks=0;
    param->max_level=0;
    if (chk_index(param,info,keyinfo,share->state.key_root[key],info->buff,
		  &keys, param->key_crc+key,1))
      DBUG_RETURN(-1);
    if(!(keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL)))
    {
      if (keys != info->state->records)
      {
	_ma_check_print_error(param,"Found %s keys of %s",llstr(keys,buff),
		    llstr(info->state->records,buff2));
	if (!(param->testflag & T_INFO))
	DBUG_RETURN(-1);
	result= -1;
	continue;
      }
unknown's avatar
unknown committed
529 530 531 532
      if ((found_keys - full_text_keys == 1 &&
           !(share->data_file_type == STATIC_RECORD)) ||
          (param->testflag & T_DONT_CHECK_CHECKSUM))
	old_record_checksum= param->record_checksum;
533 534 535
      else if (old_record_checksum != param->record_checksum)
      {
	if (key)
536 537 538
	  _ma_check_print_error(param,
                                "Key %u doesn't point at same records as "
                                "key 1",
539 540 541 542 543 544 545 546 547 548 549 550
		      key+1);
	else
	  _ma_check_print_error(param,"Key 1 doesn't point at all records");
	if (!(param->testflag & T_INFO))
	  DBUG_RETURN(-1);
	result= -1;
	continue;
      }
    }
    if ((uint) share->base.auto_key -1 == key)
    {
      /* Check that auto_increment key is bigger than max key value */
551
      ulonglong auto_increment;
552
      const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
553
      info->lastinx=key;
unknown's avatar
unknown committed
554
      _ma_read_key_record(info, info->rec_buff, 0);
555 556 557
      auto_increment=
        ma_retrieve_auto_increment(info->rec_buff + keyseg->start,
                                   keyseg->type);
558
      if (auto_increment > share->state.auto_increment)
559
      {
560 561
	_ma_check_print_warning(param, "Auto-increment value: %s is smaller "
                                "than max used value: %s",
562
                                llstr(share->state.auto_increment,buff2),
563
                                llstr(auto_increment, buff));
564 565 566
      }
      if (param->testflag & T_AUTO_INC)
      {
567
        set_if_bigger(share->state.auto_increment,
568
                      auto_increment);
569
        set_if_bigger(share->state.auto_increment,
570
                      param->auto_increment_value);
571 572 573 574 575
      }

      /* Check that there isn't a row with auto_increment = 0 in the table */
      maria_extra(info,HA_EXTRA_KEYREAD,0);
      bzero(info->lastkey,keyinfo->seg->length);
unknown's avatar
unknown committed
576
      if (!maria_rkey(info, info->rec_buff, key, (const uchar*) info->lastkey,
577
                      (key_part_map)1, HA_READ_KEY_EXACT))
578
      {
unknown's avatar
unknown committed
579
	/* Don't count this as a real warning, as maria_chk can't correct it */
580
	uint save=param->warning_printed;
581 582
	_ma_check_print_warning(param, "Found row where the auto_increment "
                                "column has the value 0");
583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617
	param->warning_printed=save;
      }
      maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
    }

    length=(my_off_t) isam_key_length(info,keyinfo)*keys + param->key_blocks*2;
    if (param->testflag & T_INFO && param->totaldata != 0L && keys != 0L)
      printf("Key: %2d:  Keyblocks used: %3d%%  Packed: %4d%%  Max levels: %2d\n",
	     key+1,
	     (int) (my_off_t2double(param->keydata)*100.0/my_off_t2double(param->totaldata)),
	     (int) ((my_off_t2double(length) - my_off_t2double(param->keydata))*100.0/
		    my_off_t2double(length)),
	     param->max_level);
    all_keydata+=param->keydata; all_totaldata+=param->totaldata; key_totlength+=length;

do_stat:
    if (param->testflag & T_STATISTICS)
      maria_update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
                       param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
                       param->notnull_count: NULL,
                       (ulonglong)info->state->records);
  }
  if (param->testflag & T_INFO)
  {
    if (all_totaldata != 0L && found_keys > 0)
      printf("Total:    Keyblocks used: %3d%%  Packed: %4d%%\n\n",
	     (int) (my_off_t2double(all_keydata)*100.0/
		    my_off_t2double(all_totaldata)),
	     (int) ((my_off_t2double(key_totlength) -
		     my_off_t2double(all_keydata))*100.0/
		     my_off_t2double(key_totlength)));
    else if (all_totaldata != 0L && maria_is_any_key_active(share->state.key_map))
      puts("");
  }
  if (param->key_file_blocks != info->state->key_file_length &&
618
      share->state.key_map == ~(ulonglong) 0)
619 620 621 622 623 624 625 626 627
    _ma_check_print_warning(param, "Some data are unreferenced in keyfile");
  if (found_keys != full_text_keys)
    param->record_checksum=old_record_checksum-init_checksum;	/* Remove delete links */
  else
    param->record_checksum=0;
  DBUG_RETURN(result);
} /* maria_chk_key */


628

unknown's avatar
unknown committed
629 630
static int chk_index_down(HA_CHECK *param, MARIA_HA *info,
                          MARIA_KEYDEF *keyinfo,
unknown's avatar
unknown committed
631
                          my_off_t page, uchar *buff, ha_rows *keys,
unknown's avatar
unknown committed
632
                          ha_checksum *key_checksum, uint level)
633 634
{
  char llbuff[22],llbuff2[22];
635
  DBUG_ENTER("chk_index_down");
636

637 638 639 640 641
  /* Key blocks must lay within the key file length entirely. */
  if (page + keyinfo->block_length > info->state->key_file_length)
  {
    /* purecov: begin tested */
    /* Give it a chance to fit in the real file size. */
642 643
    my_off_t max_length= my_seek(info->s->kfile.file, 0L, MY_SEEK_END,
                                 MYF(MY_THREADSAFE));
644 645 646 647 648
    _ma_check_print_error(param, "Invalid key block position: %s  "
                          "key block size: %u  file_length: %s",
                          llstr(page, llbuff), keyinfo->block_length,
                          llstr(info->state->key_file_length, llbuff2));
    if (page + keyinfo->block_length > max_length)
649
      goto err;
unknown's avatar
unknown committed
650
    /* Fix the remembered key file length. */
651 652 653
    info->state->key_file_length= (max_length &
                                   ~ (my_off_t) (keyinfo->block_length - 1));
    /* purecov: end */
654
  }
655

unknown's avatar
unknown committed
656 657
  /* Key blocks must be aligned at block length */
  if (page & (info->s->block_size -1))
658 659 660
  {
    /* purecov: begin tested */
    _ma_check_print_error(param, "Mis-aligned key block: %s  "
661
                          "key block length: %u",
unknown's avatar
unknown committed
662
                          llstr(page, llbuff), info->s->block_size);
663 664 665 666
    goto err;
    /* purecov: end */
  }

667 668
  if (!_ma_fetch_keypage(info, keyinfo, page, PAGECACHE_LOCK_LEFT_UNLOCKED,
                         DFLT_INIT_HITS, buff, 0, 0))
669
  {
unknown's avatar
unknown committed
670
    report_keypage_fault(param, info, page);
671 672 673 674 675 676
    goto err;
  }
  param->key_file_blocks+=keyinfo->block_length;
  if (chk_index(param,info,keyinfo,page,buff,keys,key_checksum,level))
    goto err;

677 678 679
  DBUG_RETURN(0);

  /* purecov: begin tested */
680
err:
681 682
  DBUG_RETURN(1);
  /* purecov: end */
683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
}


/*
  "Ignore NULLs" statistics collection method: process first index tuple.

  SYNOPSIS
    maria_collect_stats_nonulls_first()
      keyseg   IN     Array of key part descriptions
      notnull  INOUT  Array, notnull[i] = (number of {keypart1...keypart_i}
                                           tuples that don't contain NULLs)
      key      IN     Key values tuple

  DESCRIPTION
    Process the first index tuple - find out which prefix tuples don't
    contain NULLs, and update the array of notnull counters accordingly.
*/

static
void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
unknown's avatar
unknown committed
703
                                       const uchar *key)
704 705
{
  uint first_null, kp;
unknown's avatar
unknown committed
706
  first_null= ha_find_null(keyseg, (uchar*) key) - keyseg;
707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
  /*
    All prefix tuples that don't include keypart_{first_null} are not-null
    tuples (and all others aren't), increment counters for them.
  */
  for (kp= 0; kp < first_null; kp++)
    notnull[kp]++;
}


/*
  "Ignore NULLs" statistics collection method: process next index tuple.

  SYNOPSIS
    maria_collect_stats_nonulls_next()
      keyseg   IN     Array of key part descriptions
      notnull  INOUT  Array, notnull[i] = (number of {keypart1...keypart_i}
                                           tuples that don't contain NULLs)
      prev_key IN     Previous key values tuple
      last_key IN     Next key values tuple

  DESCRIPTION
    Process the next index tuple:
    1. Find out which prefix tuples of last_key don't contain NULLs, and
       update the array of notnull counters accordingly.
    2. Find the first keypart number where the prev_key and last_key tuples
       are different(A), or last_key has NULL value(B), and return it, so the
       caller can count number of unique tuples for each key prefix. We don't
       need (B) to be counted, and that is compensated back in
       maria_update_key_parts().

  RETURN
    1 + number of first keypart where values differ or last_key tuple has NULL
*/

static
int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
unknown's avatar
unknown committed
743 744
                                     const uchar *prev_key,
                                     const uchar *last_key)
745 746 747 748 749 750 751 752 753 754 755 756 757
{
  uint diffs[2];
  uint first_null_seg, kp;
  HA_KEYSEG *seg;

  /*
     Find the first keypart where values are different or either of them is
     NULL. We get results in diffs array:
     diffs[0]= 1 + number of first different keypart
     diffs[1]=offset: (last_key + diffs[1]) points to first value in
                      last_key that is NULL or different from corresponding
                      value in prev_key.
  */
unknown's avatar
unknown committed
758
  ha_key_cmp(keyseg, (uchar*) prev_key, (uchar*) last_key, USE_WHOLE_KEY,
759 760 761 762
             SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diffs);
  seg= keyseg + diffs[0] - 1;

  /* Find first NULL in last_key */
unknown's avatar
unknown committed
763
  first_null_seg= ha_find_null(seg, (uchar*) last_key + diffs[1]) - keyseg;
764 765 766 767 768 769 770 771 772 773 774 775
  for (kp= 0; kp < first_null_seg; kp++)
    notnull[kp]++;

  /*
    Return 1+ number of first key part where values differ. Don't care if
    these were NULLs and not .... We compensate for that in
    maria_update_key_parts.
  */
  return diffs[0];
}


776
/* Check if index is ok */
777 778

static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
unknown's avatar
unknown committed
779
		     my_off_t page, uchar *buff, ha_rows *keys,
780 781 782 783
		     ha_checksum *key_checksum, uint level)
{
  int flag;
  uint used_length,comp_flag,nod_flag,key_length=0;
unknown's avatar
unknown committed
784
  uchar key[HA_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos;
785
  my_off_t next_page,record;
786
  MARIA_SHARE *share= info->s;
787 788 789
  char llbuff[22];
  uint diff_pos[2];
  DBUG_ENTER("chk_index");
790
  DBUG_DUMP("buff", buff, _ma_get_page_used(share, buff));
791 792 793 794 795

  /* TODO: implement appropriate check for RTree keys */
  if (keyinfo->flag & HA_SPATIAL)
    DBUG_RETURN(0);

unknown's avatar
unknown committed
796
  if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length)))
797 798 799 800 801 802 803 804 805 806
  {
    _ma_check_print_error(param,"Not enough memory for keyblock");
    DBUG_RETURN(-1);
  }

  if (keyinfo->flag & HA_NOSAME)
    comp_flag=SEARCH_FIND | SEARCH_UPDATE;	/* Not real duplicates */
  else
    comp_flag=SEARCH_SAME;			/* Keys in positionorder */

807 808
  _ma_get_used_and_nod(share, buff, used_length, nod_flag);
  keypos= buff + share->keypage_header + nod_flag;
809 810 811 812
  endpos= buff + used_length;

  param->keydata+=   used_length;
  param->totaldata+= keyinfo->block_length;	/* INFO */
813 814 815 816
  param->key_blocks++;
  if (level > param->max_level)
    param->max_level=level;

817
  if (_ma_get_keynr(share, buff) != (uint) (keyinfo - share->keyinfo))
818 819
    _ma_check_print_error(param, "Page at %s is not marked for index %u",
                          llstr(page, llbuff),
820
                          (uint) (keyinfo - share->keyinfo));
821

822 823 824 825 826 827 828 829 830 831
  if (used_length > keyinfo->block_length)
  {
    _ma_check_print_error(param,"Wrong pageinfo at page: %s",
			 llstr(page,llbuff));
    goto err;
  }
  for ( ;; )
  {
    if (*_ma_killed_ptr(param))
      goto err;
unknown's avatar
unknown committed
832 833
    memcpy(info->lastkey, key, key_length);
    info->lastkey_length= key_length;
834 835 836 837 838 839 840 841 842 843 844 845 846
    if (nod_flag)
    {
      next_page= _ma_kpos(nod_flag,keypos);
      if (chk_index_down(param,info,keyinfo,next_page,
                         temp_buff,keys,key_checksum,level+1))
	goto err;
    }
    old_keypos=keypos;
    if (keypos >= endpos ||
	(key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,key)) == 0)
      break;
    if (keypos > endpos)
    {
unknown's avatar
unknown committed
847 848
      _ma_check_print_error(param,"Wrong key block length at page: %s",
                            llstr(page,llbuff));
849 850 851
      goto err;
    }
    if ((*keys)++ &&
unknown's avatar
unknown committed
852 853
	(flag=ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key,
                         key_length, comp_flag, diff_pos)) >=0)
854
    {
unknown's avatar
unknown committed
855 856 857
      DBUG_DUMP("old", info->lastkey, info->lastkey_length);
      DBUG_DUMP("new", key, key_length);
      DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
858 859

      if (comp_flag & SEARCH_FIND && flag == 0)
unknown's avatar
unknown committed
860 861
	_ma_check_print_error(param,"Found duplicated key at page %s",
                              llstr(page,llbuff));
862
      else
unknown's avatar
unknown committed
863 864
	_ma_check_print_error(param,"Key in wrong position at page %s",
                              llstr(page,llbuff));
865 866 867 868 869 870 871
      goto err;
    }
    if (param->testflag & T_STATISTICS)
    {
      if (*keys != 1L)				/* not first_key */
      {
        if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
unknown's avatar
unknown committed
872 873
          ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key,
                     USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
874 875 876 877
                     diff_pos);
        else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
        {
          diff_pos[0]= maria_collect_stats_nonulls_next(keyinfo->seg,
unknown's avatar
unknown committed
878 879
                                                        param->notnull_count,
                                                        info->lastkey, key);
880 881 882 883 884 885 886 887 888 889
        }
	param->unique_count[diff_pos[0]-1]++;
      }
      else
      {
        if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
          maria_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
                                         key);
      }
    }
unknown's avatar
unknown committed
890
    (*key_checksum)+= maria_byte_checksum((uchar*) key,
891
                                          key_length- share->rec_reflength);
892 893 894 895 896 897 898 899 900 901
    record= _ma_dpos(info,0,key+key_length);
    if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */
    {
      uint off;
      int  subkeys;
      get_key_full_length_rdonly(off, key);
      subkeys=ft_sintXkorr(key+off);
      if (subkeys < 0)
      {
        ha_rows tmp_keys=0;
902
        if (chk_index_down(param,info,&share->ft2_keyinfo,record,
903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
                           temp_buff,&tmp_keys,key_checksum,1))
          goto err;
        if (tmp_keys + subkeys)
        {
          _ma_check_print_error(param,
                               "Number of words in the 2nd level tree "
                               "does not match the number in the header. "
                               "Parent word in on the page %s, offset %u",
                               llstr(page,llbuff), (uint) (old_keypos-buff));
          goto err;
        }
        (*keys)+=tmp_keys-1;
        continue;
      }
      /* fall through */
    }
    if (record >= info->state->data_file_length)
    {
#ifndef DBUG_OFF
      char llbuff2[22], llbuff3[22];
#endif
      _ma_check_print_error(param,"Found key at page %s that points to record outside datafile",llstr(page,llbuff));
      DBUG_PRINT("test",("page: %s  record: %s  filelength: %s",
			 llstr(page,llbuff),llstr(record,llbuff2),
			 llstr(info->state->data_file_length,llbuff3)));
unknown's avatar
unknown committed
928
      DBUG_DUMP("key",(uchar*) key,key_length);
929 930 931
      DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos));
      goto err;
    }
unknown's avatar
unknown committed
932
    param->record_checksum+= (ha_checksum) record;
933 934 935
  }
  if (keypos != endpos)
  {
936 937 938 939 940
    _ma_check_print_error(param,
                          "Keyblock size at page %s is not correct. "
                          "Block length: %u  key length: %u",
                          llstr(page, llbuff), used_length,
                          (uint) (keypos - buff));
941 942
    goto err;
  }
unknown's avatar
unknown committed
943
  my_afree((uchar*) temp_buff);
944 945
  DBUG_RETURN(0);
 err:
unknown's avatar
unknown committed
946
  my_afree((uchar*) temp_buff);
947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992
  DBUG_RETURN(1);
} /* chk_index */


	/* Calculate a checksum of 1+2+3+4...N = N*(N+1)/2 without overflow */

static ha_checksum calc_checksum(ha_rows count)
{
  ulonglong sum,a,b;
  DBUG_ENTER("calc_checksum");

  sum=0;
  a=count; b=count+1;
  if (a & 1)
    b>>=1;
  else
    a>>=1;
  while (b)
  {
    if (b & 1)
      sum+=a;
    a<<=1; b>>=1;
  }
  DBUG_PRINT("exit",("sum: %lx",(ulong) sum));
  DBUG_RETURN((ha_checksum) sum);
} /* calc_checksum */


	/* Calc length of key in normal isam */

static uint isam_key_length(MARIA_HA *info, register MARIA_KEYDEF *keyinfo)
{
  uint length;
  HA_KEYSEG *keyseg;
  DBUG_ENTER("isam_key_length");

  length= info->s->rec_reflength;
  for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++)
    length+= keyseg->length;

  DBUG_PRINT("exit",("length: %d",length));
  DBUG_RETURN(length);
} /* key_length */



unknown's avatar
unknown committed
993 994
static void record_pos_to_txt(MARIA_HA *info, my_off_t recpos,
                              char *buff)
995
{
unknown's avatar
unknown committed
996 997 998
  if (info->s->data_file_type != BLOCK_RECORD)
    llstr(recpos, buff);
  else
999
  {
unknown's avatar
unknown committed
1000
    my_off_t page= ma_recordpos_to_page(recpos);
1001
    uint row= ma_recordpos_to_dir_entry(recpos);
unknown's avatar
unknown committed
1002 1003 1004
    char *end= longlong10_to_str(page, buff, 10);
    *(end++)= ':';
    longlong10_to_str(row, end, 10);
1005
  }
unknown's avatar
unknown committed
1006
}
1007

unknown's avatar
unknown committed
1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024

/*
  Check that keys in records exist in index tree

  SYNOPSIS
  check_keys_in_record()
  param		Check paramenter
  info		Maria handler
  extend	Type of check (extended or normal)
  start_recpos	Position to row
  record	Record buffer

  NOTES
    This function also calculates record checksum & number of rows
*/

static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend,
unknown's avatar
unknown committed
1025
                                my_off_t start_recpos, uchar *record)
unknown's avatar
unknown committed
1026
{
1027
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
1028 1029 1030 1031 1032 1033 1034
  MARIA_KEYDEF *keyinfo;
  char llbuff[22+4];
  uint key;

  param->tmp_record_checksum+= (ha_checksum) start_recpos;
  param->records++;
  if (param->testflag & T_WRITE_LOOP && param->records % WRITE_COUNT == 0)
1035
  {
unknown's avatar
unknown committed
1036 1037
    printf("%s\r", llstr(param->records, llbuff));
    VOID(fflush(stdout));
1038 1039
  }

unknown's avatar
unknown committed
1040
  /* Check if keys match the record */
1041
  for (key=0, keyinfo= share->keyinfo; key < share->base.keys;
unknown's avatar
unknown committed
1042
       key++,keyinfo++)
1043
  {
1044
    if (maria_is_key_active(share->state.key_map, key))
1045
    {
unknown's avatar
unknown committed
1046
      if(!(keyinfo->flag & HA_FULLTEXT))
1047
      {
unknown's avatar
unknown committed
1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061
        uint key_length= _ma_make_key(info,key,info->lastkey,record,
                                      start_recpos);
        if (extend)
        {
          /* We don't need to lock the key tree here as we don't allow
             concurrent threads when running maria_chk
          */
          int search_result=
#ifdef HAVE_RTREE_KEYS
            (keyinfo->flag & HA_SPATIAL) ?
            maria_rtree_find_first(info, key, info->lastkey, key_length,
                                   MBR_EQUAL | MBR_DATA) :
#endif
            _ma_search(info,keyinfo,info->lastkey,key_length,
1062
                       SEARCH_SAME, share->state.key_root[key]);
unknown's avatar
unknown committed
1063 1064 1065
          if (search_result)
          {
            record_pos_to_txt(info, start_recpos, llbuff);
unknown's avatar
unknown committed
1066 1067 1068 1069
            _ma_check_print_error(param,
                                  "Record at: %14s  "
                                  "Can't find key for index: %2d",
                                  llbuff, key+1);
unknown's avatar
unknown committed
1070 1071 1072 1073 1074 1075
            if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
              return -1;
          }
        }
        else
          param->tmp_key_crc[key]+=
unknown's avatar
unknown committed
1076
            maria_byte_checksum((uchar*) info->lastkey, key_length);
1077 1078 1079
      }
    }
  }
unknown's avatar
unknown committed
1080 1081 1082
  return 0;
}

1083

unknown's avatar
unknown committed
1084 1085 1086 1087 1088
/*
  Functions to loop through all rows and check if they are ok

  NOTES
    One function for each record format
1089

unknown's avatar
unknown committed
1090 1091 1092 1093 1094 1095 1096
  RESULT
    0  ok
    -1 Interrupted by user
    1  Error
*/

static int check_static_record(HA_CHECK *param, MARIA_HA *info, int extend,
unknown's avatar
unknown committed
1097
                               uchar *record)
unknown's avatar
unknown committed
1098
{
1099
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
1100 1101 1102
  my_off_t start_recpos, pos;
  char llbuff[22];

1103
  pos= 0;
1104 1105 1106
  while (pos < info->state->data_file_length)
  {
    if (*_ma_killed_ptr(param))
unknown's avatar
unknown committed
1107
      return -1;
unknown's avatar
unknown committed
1108
    if (my_b_read(&param->read_cache,(uchar*) record,
1109
                  share->base.pack_reclength))
unknown's avatar
unknown committed
1110 1111 1112 1113 1114 1115 1116
    {
      _ma_check_print_error(param,
                            "got error: %d when reading datafile at position: %s",
                            my_errno, llstr(pos, llbuff));
      return 1;
    }
    start_recpos= pos;
1117
    pos+= share->base.pack_reclength;
unknown's avatar
unknown committed
1118 1119 1120 1121
    param->splits++;
    if (*record == '\0')
    {
      param->del_blocks++;
1122
      param->del_length+= share->base.pack_reclength;
unknown's avatar
unknown committed
1123 1124 1125
      continue;					/* Record removed */
    }
    param->glob_crc+= _ma_static_checksum(info,record);
1126
    param->used+= share->base.pack_reclength;
unknown's avatar
unknown committed
1127 1128 1129 1130 1131 1132 1133 1134
    if (check_keys_in_record(param, info, extend, start_recpos, record))
      return 1;
  }
  return 0;
}


static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
unknown's avatar
unknown committed
1135
                                uchar *record)
unknown's avatar
unknown committed
1136 1137
{
  MARIA_BLOCK_INFO block_info;
1138
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
1139
  my_off_t start_recpos, start_block, pos;
unknown's avatar
unknown committed
1140
  uchar *to;
unknown's avatar
unknown committed
1141 1142 1143 1144 1145
  ulong left_length;
  uint	b_type;
  char llbuff[22],llbuff2[22],llbuff3[22];
  DBUG_ENTER("check_dynamic_record");

unknown's avatar
unknown committed
1146 1147 1148 1149
  LINT_INIT(left_length);
  LINT_INIT(start_recpos);
  LINT_INIT(to);

1150
  pos= 0;
unknown's avatar
unknown committed
1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161
  while (pos < info->state->data_file_length)
  {
    my_bool got_error= 0;
    int flag;
    if (*_ma_killed_ptr(param))
      DBUG_RETURN(-1);

    flag= block_info.second_read=0;
    block_info.next_filepos=pos;
    do
    {
unknown's avatar
unknown committed
1162
      if (_ma_read_cache(&param->read_cache,(uchar*) block_info.header,
unknown's avatar
unknown committed
1163 1164 1165
                         (start_block=block_info.next_filepos),
                         sizeof(block_info.header),
                         (flag ? 0 : READING_NEXT) | READING_HEADER))
1166
      {
unknown's avatar
unknown committed
1167
        _ma_check_print_error(param,
unknown's avatar
unknown committed
1168 1169
                              "got error: %d when reading datafile at "
                              "position: %s",
unknown's avatar
unknown committed
1170 1171
                              my_errno, llstr(start_block, llbuff));
        DBUG_RETURN(1);
1172
      }
unknown's avatar
unknown committed
1173 1174

      if (start_block & (MARIA_DYN_ALIGN_SIZE-1))
1175
      {
unknown's avatar
unknown committed
1176 1177 1178 1179 1180 1181 1182
        _ma_check_print_error(param,"Wrong aligned block at %s",
                              llstr(start_block,llbuff));
        DBUG_RETURN(1);
      }
      b_type= _ma_get_block_info(&block_info,-1,start_block);
      if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
                    BLOCK_FATAL_ERROR))
1183
      {
unknown's avatar
unknown committed
1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197
        if (b_type & BLOCK_SYNC_ERROR)
        {
          if (flag)
          {
            _ma_check_print_error(param,"Unexpected byte: %d at link: %s",
                                  (int) block_info.header[0],
                                  llstr(start_block,llbuff));
            DBUG_RETURN(1);
          }
          pos=block_info.filepos+block_info.block_len;
          goto next;
        }
        if (b_type & BLOCK_DELETED)
        {
1198
          if (block_info.block_len < share->base.min_block_length)
unknown's avatar
unknown committed
1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224
          {
            _ma_check_print_error(param,
                                  "Deleted block with impossible length %lu at %s",
                                  block_info.block_len,llstr(pos,llbuff));
            DBUG_RETURN(1);
          }
          if ((block_info.next_filepos != HA_OFFSET_ERROR &&
               block_info.next_filepos >= info->state->data_file_length) ||
              (block_info.prev_filepos != HA_OFFSET_ERROR &&
               block_info.prev_filepos >= info->state->data_file_length))
          {
            _ma_check_print_error(param,"Delete link points outside datafile at %s",
                                  llstr(pos,llbuff));
            DBUG_RETURN(1);
          }
          param->del_blocks++;
          param->del_length+= block_info.block_len;
          param->splits++;
          pos= block_info.filepos+block_info.block_len;
          goto next;
        }
        _ma_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s",
                              block_info.header[0],block_info.header[1],
                              block_info.header[2],
                              llstr(start_block,llbuff));
        DBUG_RETURN(1);
1225
      }
unknown's avatar
unknown committed
1226 1227
      if (info->state->data_file_length < block_info.filepos+
          block_info.block_len)
1228
      {
unknown's avatar
unknown committed
1229 1230 1231 1232 1233
        _ma_check_print_error(param,
                              "Recordlink that points outside datafile at %s",
                              llstr(pos,llbuff));
        got_error=1;
        break;
1234
      }
unknown's avatar
unknown committed
1235 1236
      param->splits++;
      if (!flag++)				/* First block */
1237
      {
unknown's avatar
unknown committed
1238 1239
        start_recpos=pos;
        pos=block_info.filepos+block_info.block_len;
1240
        if (block_info.rec_len > (uint) share->base.max_pack_length)
unknown's avatar
unknown committed
1241 1242 1243 1244 1245 1246 1247
        {
          _ma_check_print_error(param,"Found too long record (%lu) at %s",
                                (ulong) block_info.rec_len,
                                llstr(start_recpos,llbuff));
          got_error=1;
          break;
        }
1248
        if (share->base.blobs)
unknown's avatar
unknown committed
1249 1250 1251
        {
          if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
                               block_info.rec_len +
1252
                               share->base.extra_rec_buff_size))
1253

unknown's avatar
unknown committed
1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273
          {
            _ma_check_print_error(param,
                                  "Not enough memory (%lu) for blob at %s",
                                  (ulong) block_info.rec_len,
                                  llstr(start_recpos,llbuff));
            got_error=1;
            break;
          }
        }
        to= info->rec_buff;
        left_length= block_info.rec_len;
      }
      if (left_length < block_info.data_len)
      {
        _ma_check_print_error(param,"Found too long record (%lu) at %s",
                              (ulong) block_info.data_len,
                              llstr(start_recpos,llbuff));
        got_error=1;
        break;
      }
unknown's avatar
unknown committed
1274
      if (_ma_read_cache(&param->read_cache,(uchar*) to,block_info.filepos,
unknown's avatar
unknown committed
1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318
                         (uint) block_info.data_len,
                         flag == 1 ? READING_NEXT : 0))
      {
        _ma_check_print_error(param,
                              "got error: %d when reading datafile at position: %s", my_errno, llstr(block_info.filepos, llbuff));

        DBUG_RETURN(1);
      }
      to+=block_info.data_len;
      param->link_used+= block_info.filepos-start_block;
      param->used+= block_info.filepos - start_block + block_info.data_len;
      param->empty+= block_info.block_len-block_info.data_len;
      left_length-= block_info.data_len;
      if (left_length)
      {
        if (b_type & BLOCK_LAST)
        {
          _ma_check_print_error(param,
                                "Wrong record length %s of %s at %s",
                                llstr(block_info.rec_len-left_length,llbuff),
                                llstr(block_info.rec_len, llbuff2),
                                llstr(start_recpos,llbuff3));
          got_error=1;
          break;
        }
        if (info->state->data_file_length < block_info.next_filepos)
        {
          _ma_check_print_error(param,
                                "Found next-recordlink that points outside datafile at %s",
                                llstr(block_info.filepos,llbuff));
          got_error=1;
          break;
        }
      }
    } while (left_length);

    if (! got_error)
    {
      if (_ma_rec_unpack(info,record,info->rec_buff,block_info.rec_len) ==
          MY_FILE_ERROR)
      {
        _ma_check_print_error(param,"Found wrong record at %s",
                              llstr(start_recpos,llbuff));
        got_error=1;
1319 1320
      }
      else
unknown's avatar
unknown committed
1321
      {
1322
        ha_checksum checksum= 0;
1323 1324
        if (share->calc_checksum)
          checksum= (*share->calc_checksum)(info, record);
1325

unknown's avatar
unknown committed
1326 1327 1328
        if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE))
        {
          if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len,
1329
                            test(share->calc_checksum), checksum))
unknown's avatar
unknown committed
1330 1331 1332 1333 1334 1335
          {
            _ma_check_print_error(param,"Found wrong packed record at %s",
                                  llstr(start_recpos,llbuff));
            got_error= 1;
          }
        }
1336
        param->glob_crc+= checksum;
unknown's avatar
unknown committed
1337
      }
1338

unknown's avatar
unknown committed
1339 1340 1341 1342
      if (! got_error)
      {
        if (check_keys_in_record(param, info, extend, start_recpos, record))
          DBUG_RETURN(1);
1343 1344
      }
      else
unknown's avatar
unknown committed
1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358
      {
        if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
          DBUG_RETURN(1);
      }
    }
    else if (!flag)
      pos= block_info.filepos+block_info.block_len;
next:;
  }
  DBUG_RETURN(0);
}


static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend,
unknown's avatar
unknown committed
1359
                                   uchar *record)
unknown's avatar
unknown committed
1360
{
1361 1362
  MARIA_BLOCK_INFO block_info;
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
1363 1364 1365 1366 1367
  my_off_t start_recpos, pos;
  char llbuff[22];
  bool got_error= 0;
  DBUG_ENTER("check_compressed_record");

1368
  pos= share->pack.header_length;             /* Skip header */
unknown's avatar
unknown committed
1369 1370 1371 1372 1373
  while (pos < info->state->data_file_length)
  {
    if (*_ma_killed_ptr(param))
      DBUG_RETURN(-1);

unknown's avatar
unknown committed
1374
    if (_ma_read_cache(&param->read_cache,(uchar*) block_info.header, pos,
1375
                       share->pack.ref_length, READING_NEXT))
unknown's avatar
unknown committed
1376 1377 1378 1379 1380 1381 1382 1383 1384
    {
      _ma_check_print_error(param,
                            "got error: %d when reading datafile at position: %s",
                            my_errno, llstr(pos, llbuff));
      DBUG_RETURN(1);
    }

    start_recpos= pos;
    param->splits++;
unknown's avatar
unknown committed
1385
    VOID(_ma_pack_get_block_info(info, &info->bit_buff, &block_info,
unknown's avatar
unknown committed
1386 1387
                                 &info->rec_buff, &info->rec_buff_size, -1,
                                 start_recpos));
unknown's avatar
unknown committed
1388
    pos=block_info.filepos+block_info.rec_len;
1389 1390
    if (block_info.rec_len < (uint) share->min_pack_length ||
        block_info.rec_len > (uint) share->max_pack_length)
unknown's avatar
unknown committed
1391 1392
    {
      _ma_check_print_error(param,
1393
                            "Found block with wrong recordlength: %lu at %s",
unknown's avatar
unknown committed
1394 1395 1396 1397
                            block_info.rec_len, llstr(start_recpos,llbuff));
      got_error=1;
      goto end;
    }
unknown's avatar
unknown committed
1398
    if (_ma_read_cache(&param->read_cache,(uchar*) info->rec_buff,
unknown's avatar
unknown committed
1399 1400 1401 1402 1403 1404 1405
                       block_info.filepos, block_info.rec_len, READING_NEXT))
    {
      _ma_check_print_error(param,
                            "got error: %d when reading datafile at position: %s",
                            my_errno, llstr(block_info.filepos, llbuff));
      DBUG_RETURN(1);
    }
unknown's avatar
unknown committed
1406 1407
    if (_ma_pack_rec_unpack(info, &info->bit_buff, record,
                            info->rec_buff, block_info.rec_len))
unknown's avatar
unknown committed
1408 1409 1410 1411 1412 1413
    {
      _ma_check_print_error(param,"Found wrong record at %s",
                            llstr(start_recpos,llbuff));
      got_error=1;
      goto end;
    }
1414
    param->glob_crc+= (*share->calc_checksum)(info,record);
unknown's avatar
unknown committed
1415 1416 1417 1418
    param->link_used+= (block_info.filepos - start_recpos);
    param->used+= (pos-start_recpos);

end:
1419 1420
    if (! got_error)
    {
unknown's avatar
unknown committed
1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435
      if (check_keys_in_record(param, info, extend, start_recpos, record))
        DBUG_RETURN(1);
    }
    else
    {
      got_error= 0;                             /* Reset for next loop */
      if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
        DBUG_RETURN(1);
    }
  }
  DBUG_RETURN(0);
}


/*
1436
  Check if layout on head or tail page is ok
1437 1438 1439

  NOTES
    This is for rows-in-block format.
unknown's avatar
unknown committed
1440 1441 1442
*/

static int check_page_layout(HA_CHECK *param, MARIA_HA *info,
unknown's avatar
unknown committed
1443
                             my_off_t page_pos, uchar *page,
unknown's avatar
unknown committed
1444 1445 1446
                             uint row_count, uint head_empty,
                             uint *real_rows_found)
{
1447 1448
  uint empty, last_row_end, row, first_dir_entry, free_entry, block_size;
  uint free_entries, prev_free_entry;
unknown's avatar
unknown committed
1449
  uchar *dir_entry;
unknown's avatar
unknown committed
1450
  char llbuff[22];
1451
  my_bool error_in_free_list= 0;
unknown's avatar
unknown committed
1452 1453
  DBUG_ENTER("check_page_layout");

1454
  block_size= info->s->block_size;
unknown's avatar
unknown committed
1455 1456 1457 1458
  empty= 0;
  last_row_end= PAGE_HEADER_SIZE;
  *real_rows_found= 0;

1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499
  /* Check free directory list */
  free_entry= (uint) page[DIR_FREE_OFFSET];
  free_entries= 0;
  prev_free_entry= END_OF_DIR_FREE_LIST;
  while (free_entry != END_OF_DIR_FREE_LIST)
  {
    uchar *dir;
    if (free_entry > row_count)
    {
      _ma_check_print_error(param,
                            "Page %9s:  Directory free entry points outside "
                            "directory",
                            llstr(page_pos, llbuff));
      error_in_free_list= 1;
      break;
    }
    dir= dir_entry_pos(page, block_size, free_entry);
    if (uint2korr(dir) != 0)
    {
      _ma_check_print_error(param,
                            "Page %9s:  Directory free entry points to "
                            "not deleted entry",
                            llstr(page_pos, llbuff));
      error_in_free_list= 1;
      break;
    }
    if (dir[2] != prev_free_entry)
    {
      _ma_check_print_error(param,
                            "Page %9s:  Directory free list back pointer "
                            "points to wrong entry",
                            llstr(page_pos, llbuff));
      error_in_free_list= 1;
      break;
    }
    prev_free_entry= free_entry;
    free_entry= dir[3];
    free_entries++;
  }

  /* Check directry */
1500 1501
  dir_entry= page+ block_size - PAGE_SUFFIX_SIZE;
  first_dir_entry= (block_size - row_count * DIR_ENTRY_SIZE -
1502
                    PAGE_SUFFIX_SIZE);
unknown's avatar
unknown committed
1503 1504 1505 1506 1507 1508 1509
  for (row= 0 ; row < row_count ; row++)
  {
    uint pos, length;
    dir_entry-= DIR_ENTRY_SIZE;
    pos= uint2korr(dir_entry);
    if (!pos)
    {
1510
      free_entries--;
unknown's avatar
unknown committed
1511
      if (row == row_count -1)
1512
      {
unknown's avatar
unknown committed
1513 1514 1515 1516 1517
        _ma_check_print_error(param,
                              "Page %9s:  First entry in directory is 0",
                              llstr(page_pos, llbuff));
        if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
          DBUG_RETURN(1);
1518
      }
unknown's avatar
unknown committed
1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547
      continue;                                 /* Deleted row */
    }
    (*real_rows_found)++;
    length= uint2korr(dir_entry+2);
    param->used+= length;
    if (pos < last_row_end)
    {
      _ma_check_print_error(param,
                            "Page %9s:  Row %3u overlapps with previous row",
                            llstr(page_pos, llbuff), row);
      DBUG_RETURN(1);
    }
    empty+= (pos - last_row_end);
    last_row_end= pos + length;
    if (last_row_end > first_dir_entry)
    {
      _ma_check_print_error(param,
                            "Page %9s:  Row %3u overlapps with directory",
                            llstr(page_pos, llbuff), row);
      DBUG_RETURN(1);
    }
  }
  empty+= (first_dir_entry - last_row_end);

  if (empty != head_empty)
  {
    _ma_check_print_error(param,
                          "Page %9s:  Wrong empty size.  Stored: %5u  Actual: %5u",
                          llstr(page_pos, llbuff), head_empty, empty);
1548
    param->err_count++;
unknown's avatar
unknown committed
1549
  }
1550 1551 1552 1553 1554 1555 1556 1557 1558 1559
  if (free_entries != 0 && !error_in_free_list)
  {
    _ma_check_print_error(param,
                          "Page %9s:  Directory free link don't include "
                          "all free entries",
                          llstr(page_pos, llbuff));
    param->err_count++;
  }
  DBUG_RETURN(param->err_count &&
              (param->err_count >= MAXERR || !(param->testflag & T_VERBOSE)));
unknown's avatar
unknown committed
1560 1561 1562 1563 1564 1565 1566
}


/*
  Check all rows on head page

  NOTES
1567 1568
    This is for rows-in-block format.

unknown's avatar
unknown committed
1569 1570 1571 1572 1573 1574 1575
    Before this, we have already called check_page_layout(), so
    we know the block is logicaly correct (even if the rows may not be that)

  RETURN
   0  ok
   1  error
*/
1576 1577


unknown's avatar
unknown committed
1578 1579
static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, uchar *record,
                               int extend, my_off_t page_pos, uchar *page_buff,
unknown's avatar
unknown committed
1580 1581
                               uint row_count)
{
1582
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
1583
  uchar *dir_entry;
unknown's avatar
unknown committed
1584 1585
  uint row;
  char llbuff[22], llbuff2[22];
unknown's avatar
unknown committed
1586
  ulonglong page= page_pos / share->block_size;
unknown's avatar
unknown committed
1587 1588
  DBUG_ENTER("check_head_page");

1589
  dir_entry= page_buff+ share->block_size - PAGE_SUFFIX_SIZE;
unknown's avatar
unknown committed
1590 1591 1592 1593 1594 1595 1596 1597
  for (row= 0 ; row < row_count ; row++)
  {
    uint pos, length, flag;
    dir_entry-= DIR_ENTRY_SIZE;
    pos= uint2korr(dir_entry);
    if (!pos)
      continue;
    length= uint2korr(dir_entry+2);
1598
    if (length < share->base.min_block_length)
unknown's avatar
unknown committed
1599 1600 1601
    {
      _ma_check_print_error(param,
                            "Page %9s:  Row %3u is too short (%d bytes)",
unknown's avatar
unknown committed
1602
                            llstr(page, llbuff), row, length);
unknown's avatar
unknown committed
1603 1604 1605 1606 1607 1608
      DBUG_RETURN(1);
    }
    flag= (uint) (uchar) page_buff[pos];
    if (flag & ~(ROW_FLAG_ALL))
      _ma_check_print_error(param,
                            "Page %9s: Row %3u has wrong flag: %d",
unknown's avatar
unknown committed
1609
                            llstr(page, llbuff), row, flag);
unknown's avatar
unknown committed
1610 1611

    DBUG_PRINT("info", ("rowid: %s  page: %lu  row: %u",
unknown's avatar
unknown committed
1612 1613
                        llstr(ma_recordpos(page, row), llbuff),
                        (ulong) page, row));
unknown's avatar
unknown committed
1614 1615 1616 1617 1618
    if (_ma_read_block_record2(info, record, page_buff+pos,
                               page_buff+pos+length))
    {
      _ma_check_print_error(param,
                            "Page %9s:  Row %3d is crashed",
unknown's avatar
unknown committed
1619
                            llstr(page, llbuff), row);
unknown's avatar
unknown committed
1620 1621 1622 1623
      if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
        DBUG_RETURN(1);
      continue;
    }
1624
    if (share->calc_checksum)
unknown's avatar
unknown committed
1625
    {
1626
      ha_checksum checksum= (*share->calc_checksum)(info, record);
1627 1628
      if (info->cur_row.checksum != (checksum & 255))
        _ma_check_print_error(param, "Page %9s:  Row %3d has wrong checksum",
unknown's avatar
unknown committed
1629
                              llstr(page, llbuff), row);
1630
      param->glob_crc+= checksum;
unknown's avatar
unknown committed
1631 1632 1633
    }
    if (info->cur_row.extents_count)
    {
unknown's avatar
unknown committed
1634
      uchar *extents= info->cur_row.extents;
unknown's avatar
unknown committed
1635 1636 1637
      uint i;
      /* Check that bitmap has the right marker for the found extents */
      for (i= 0 ; i < info->cur_row.extents_count ; i++)
1638
      {
1639 1640
        pgcache_page_no_t extent_page;
        uint page_count, page_type;
unknown's avatar
unknown committed
1641
        extent_page= uint5korr(extents);
1642
        page_count=  uint2korr(extents+5) & ~START_EXTENT_BIT;
unknown's avatar
unknown committed
1643 1644 1645 1646 1647 1648 1649
        extents+=    ROW_EXTENT_SIZE;
        page_type=   BLOB_PAGE;
        if (page_count & TAIL_BIT)
        {
          page_count= 1;
          page_type= TAIL_PAGE;
        }
1650 1651 1652 1653 1654
        /*
          TODO OPTIMIZE:
          Check the whole extent with one test and only do the loop if
          something is wrong (for exact error reporting)
        */
unknown's avatar
unknown committed
1655
        for ( ; page_count--; extent_page++)
unknown's avatar
unknown committed
1656 1657
        {
          uint bitmap_pattern;
unknown's avatar
unknown committed
1658
          if (_ma_check_if_right_bitmap_type(info, page_type, extent_page,
unknown's avatar
unknown committed
1659 1660 1661 1662
                                             &bitmap_pattern))
          {
            _ma_check_print_error(param,
                                  "Page %9s:  Row: %3d has an extent with wrong information in bitmap:  Page %9s  Page_type: %d  Bitmap: %d",
unknown's avatar
unknown committed
1663 1664 1665
                                  llstr(page, llbuff), row,
                                  llstr(extent_page, llbuff2),
                                  page_type, bitmap_pattern);
unknown's avatar
unknown committed
1666 1667 1668 1669
            if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
              DBUG_RETURN(1);
          }
        }
1670 1671
      }
    }
unknown's avatar
unknown committed
1672 1673 1674
    param->full_page_count+= info->cur_row.full_page_count;
    param->tail_count+= info->cur_row.tail_count;
    if (check_keys_in_record(param, info, extend,
unknown's avatar
unknown committed
1675
                             ma_recordpos(page, row), record))
unknown's avatar
unknown committed
1676 1677 1678 1679 1680 1681
      DBUG_RETURN(1);
  }
  DBUG_RETURN(0);
}


1682 1683 1684
/*
  Check if rows-in-block data file is consistent
*/
unknown's avatar
unknown committed
1685 1686

static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
unknown's avatar
unknown committed
1687
                              uchar *record)
unknown's avatar
unknown committed
1688
{
1689
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
1690
  my_off_t pos;
1691
  pgcache_page_no_t page;
unknown's avatar
unknown committed
1692
  uchar *page_buff, *bitmap_buff, *data;
unknown's avatar
unknown committed
1693
  char llbuff[22], llbuff2[22];
1694
  uint block_size= share->block_size;
unknown's avatar
unknown committed
1695 1696 1697
  ha_rows full_page_count, tail_count;
  my_bool full_dir;
  uint offset_page, offset;
1698

1699 1700
  LINT_INIT(full_dir);

unknown's avatar
unknown committed
1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711
  if (_ma_scan_init_block_record(info))
  {
    _ma_check_print_error(param, "got error %d when initializing scan",
                          my_errno);
    return 1;
  }
  bitmap_buff= info->scan.bitmap_buff;
  page_buff= info->scan.page_buff;
  full_page_count= tail_count= 0;
  param->full_page_count= param->tail_count= 0;
  param->used= param->link_used= 0;
unknown's avatar
unknown committed
1712
  param->splits= info->state->data_file_length / block_size;
1713

unknown's avatar
unknown committed
1714
  for (pos= 0, page= 0;
unknown's avatar
unknown committed
1715
       pos < info->state->data_file_length;
unknown's avatar
unknown committed
1716
       pos+= block_size, page++)
unknown's avatar
unknown committed
1717 1718 1719 1720 1721 1722 1723 1724 1725 1726
  {
    uint row_count, real_row_count, empty_space, page_type, bitmap_pattern;
    LINT_INIT(row_count);
    LINT_INIT(empty_space);

    if (*_ma_killed_ptr(param))
    {
      _ma_scan_end_block_record(info);
      return -1;
    }
unknown's avatar
unknown committed
1727
    if ((page % share->bitmap.pages_covered) == 0)
unknown's avatar
unknown committed
1728 1729
    {
      /* Bitmap page */
1730
      if (pagecache_read(share->pagecache,
1731
                         &info->s->bitmap.file,
unknown's avatar
unknown committed
1732
                         page, 1,
unknown's avatar
unknown committed
1733 1734 1735
                         bitmap_buff,
                         PAGECACHE_PLAIN_PAGE,
                         PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
1736
      {
unknown's avatar
unknown committed
1737 1738
        _ma_check_print_error(param,
                              "Page %9s:  Got error: %d when reading datafile",
unknown's avatar
unknown committed
1739
                              llstr(page, llbuff), my_errno);
unknown's avatar
unknown committed
1740
        goto err;
1741
      }
unknown's avatar
unknown committed
1742 1743 1744
      param->used+= block_size;
      param->link_used+= block_size;
      continue;
1745
    }
unknown's avatar
unknown committed
1746
    /* Skip pages marked as empty in bitmap */
1747
    offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
unknown's avatar
unknown committed
1748 1749 1750 1751
    offset= offset_page & 7;
    data= bitmap_buff + offset_page / 8;
    bitmap_pattern= uint2korr(data);
    if (!((bitmap_pattern >> offset) & 7))
1752
    {
unknown's avatar
unknown committed
1753 1754 1755 1756 1757
      param->empty+= block_size;
      param->del_blocks++;
      continue;
    }

1758
    if (pagecache_read(share->pagecache,
unknown's avatar
unknown committed
1759
                       &info->dfile,
unknown's avatar
unknown committed
1760
                       page, 1,
unknown's avatar
unknown committed
1761
                       page_buff,
1762
                       share->page_type,
unknown's avatar
unknown committed
1763
                       PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
unknown's avatar
unknown committed
1764 1765 1766
    {
      _ma_check_print_error(param,
                            "Page %9s:  Got error: %d when reading datafile",
unknown's avatar
unknown committed
1767
                            llstr(page, llbuff), my_errno);
unknown's avatar
unknown committed
1768 1769 1770 1771 1772 1773
      goto err;
    }
    page_type= page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK;
    if (page_type == UNALLOCATED_PAGE || page_type >= MAX_PAGE_TYPE)
    {
      _ma_check_print_error(param,
unknown's avatar
unknown committed
1774 1775
                            "Page: %9s  Found wrong page type %d",
                            llstr(page, llbuff), page_type);
unknown's avatar
unknown committed
1776 1777
      if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
        goto err;
1778
      continue;
unknown's avatar
unknown committed
1779 1780 1781 1782
    }
    switch ((enum en_page_type) page_type) {
    case UNALLOCATED_PAGE:
    case MAX_PAGE_TYPE:
1783
    default:
1784
      DBUG_ASSERT(0);                           /* Impossible */
unknown's avatar
unknown committed
1785 1786
      break;
    case HEAD_PAGE:
1787
      row_count= ((uchar*) page_buff)[DIR_COUNT_OFFSET];
unknown's avatar
unknown committed
1788
      empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
unknown's avatar
unknown committed
1789
      param->used+= block_size - empty_space;
unknown's avatar
unknown committed
1790 1791
      param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE +
                          row_count * DIR_ENTRY_SIZE);
unknown's avatar
unknown committed
1792 1793
      if (empty_space < share->bitmap.sizes[3])
        param->lost+= empty_space;
unknown's avatar
unknown committed
1794 1795 1796
      full_dir= row_count == MAX_ROWS_PER_PAGE;
      break;
    case TAIL_PAGE:
1797
      row_count= ((uchar*) page_buff)[DIR_COUNT_OFFSET];
unknown's avatar
unknown committed
1798
      empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
unknown's avatar
unknown committed
1799
      param->used+= block_size - empty_space;
unknown's avatar
unknown committed
1800 1801 1802
      param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE +
                          row_count * DIR_ENTRY_SIZE);
      full_dir= row_count == MAX_ROWS_PER_PAGE;
unknown's avatar
unknown committed
1803 1804
      if (empty_space < share->bitmap.sizes[6])
        param->lost+= empty_space;
unknown's avatar
unknown committed
1805 1806 1807 1808 1809 1810 1811 1812
      break;
    case BLOB_PAGE:
      full_page_count++;
      full_dir= 0;
      empty_space= block_size;                  /* for error reporting */
      param->link_used+= (LSN_SIZE + PAGE_TYPE_SIZE);
      param->used+= block_size;
      break;
1813
    }
unknown's avatar
unknown committed
1814
    if (_ma_check_bitmap_data(info, page_type, page,
unknown's avatar
unknown committed
1815 1816 1817
                              full_dir ? 0 : empty_space,
                              &bitmap_pattern))
    {
1818 1819
      if (bitmap_pattern == ~(uint) 0)
        _ma_check_print_error(param,
unknown's avatar
unknown committed
1820 1821
                              "Page %9s: Wrong bitmap for data on page",
                              llstr(page, llbuff));
1822
      else
1823 1824
        _ma_check_print_error(param,
                              "Page %9s:  Wrong data in bitmap.  Page_type: %d  empty_space: %u  Bitmap-bits: %d",
unknown's avatar
unknown committed
1825 1826
                              llstr(page, llbuff), page_type,
                              empty_space, bitmap_pattern);
unknown's avatar
unknown committed
1827 1828 1829 1830 1831 1832 1833 1834 1835 1836
      if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
        goto err;
    }
    if ((enum en_page_type) page_type == BLOB_PAGE)
      continue;
    param->empty+= empty_space;
    if (check_page_layout(param, info, pos, page_buff, row_count,
                          empty_space, &real_row_count))
      goto err;
    if ((enum en_page_type) page_type == TAIL_PAGE)
1837
    {
unknown's avatar
unknown committed
1838 1839
      tail_count+= real_row_count;
      continue;
1840
    }
unknown's avatar
unknown committed
1841 1842 1843
    if (check_head_page(param, info, record, extend, pos, page_buff,
                        row_count))
      goto err;
1844
  }
unknown's avatar
unknown committed
1845

1846 1847
  /* Verify that rest of bitmap is zero */

unknown's avatar
unknown committed
1848
  if (page % share->bitmap.pages_covered)
1849 1850 1851
  {
    /* Not at end of bitmap */
    uint bitmap_pattern;
1852
    offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
1853 1854 1855 1856
    offset= offset_page & 7;
    data= bitmap_buff + offset_page / 8;
    bitmap_pattern= uint2korr(data);
    if (((bitmap_pattern >> offset)) ||
1857 1858
        (data + 2 < bitmap_buff + share->bitmap.total_size &&
         _ma_check_if_zero(data+2, bitmap_buff + share->bitmap.total_size -
1859 1860 1861
                           data - 2)))
    {
      ulonglong bitmap_page;
unknown's avatar
unknown committed
1862
      bitmap_page= page / share->bitmap.pages_covered;
1863
      bitmap_page*= share->bitmap.pages_covered;
1864

unknown's avatar
unknown committed
1865 1866 1867
      _ma_check_print_error(param,
                            "Bitmap at page %s has pages reserved outside of "
                            "data file length",
1868
                            llstr(bitmap_page, llbuff));
1869
      DBUG_EXECUTE("bitmap", _ma_print_bitmap(&share->bitmap, bitmap_buff,
1870 1871 1872 1873
                                              bitmap_page););
    }
  }

unknown's avatar
unknown committed
1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892
  _ma_scan_end_block_record(info);

  if (full_page_count != param->full_page_count)
    _ma_check_print_error(param, "Full page count read through records was %s but we found %s pages while scanning table",
                          llstr(param->full_page_count, llbuff),
                          llstr(full_page_count, llbuff2));
  if (tail_count != param->tail_count)
    _ma_check_print_error(param, "Tail count read through records was %s but we found %s tails while scanning table",
                          llstr(param->tail_count, llbuff),
                          llstr(tail_count, llbuff2));

  return param->error_printed != 0;

err:
  _ma_scan_end_block_record(info);
  return 1;
}


1893
/* Check that record-link is ok */
unknown's avatar
unknown committed
1894

1895
int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend)
unknown's avatar
unknown committed
1896
{
1897
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
1898
  int	error;
unknown's avatar
unknown committed
1899
  uchar *record;
unknown's avatar
unknown committed
1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910
  char llbuff[22],llbuff2[22],llbuff3[22];
  DBUG_ENTER("maria_chk_data_link");

  if (!(param->testflag & T_SILENT))
  {
    if (extend)
      puts("- check records and index references");
    else
      puts("- check record links");
  }

1911
  if (!(record= (uchar*) my_malloc(share->base.pack_reclength,MYF(0))))
unknown's avatar
unknown committed
1912 1913 1914 1915 1916 1917
  {
    _ma_check_print_error(param,"Not enough memory for record");
    DBUG_RETURN(-1);
  }
  param->records= param->del_blocks= 0;
  param->used= param->link_used= param->splits= param->del_length= 0;
unknown's avatar
unknown committed
1918
  param->lost= 0;
unknown's avatar
unknown committed
1919 1920
  param->tmp_record_checksum= param->glob_crc= 0;
  param->err_count= 0;
unknown's avatar
unknown committed
1921

unknown's avatar
unknown committed
1922
  error= 0;
1923
  param->empty= share->pack.header_length;
unknown's avatar
unknown committed
1924 1925

  bzero((char*) param->tmp_key_crc,
1926
        share->base.keys * sizeof(param->tmp_key_crc[0]));
1927

1928
  switch (share->data_file_type) {
unknown's avatar
unknown committed
1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945
  case BLOCK_RECORD:
    error= check_block_record(param, info, extend, record);
    break;
  case STATIC_RECORD:
    error= check_static_record(param, info, extend, record);
    break;
  case DYNAMIC_RECORD:
    error= check_dynamic_record(param, info, extend, record);
    break;
  case COMPRESSED_RECORD:
    error= check_compressed_record(param, info, extend, record);
    break;
  } /* switch */

  if (error)
    goto err;

1946 1947 1948 1949
  if (param->testflag & T_WRITE_LOOP)
  {
    VOID(fputs("          \r",stdout)); VOID(fflush(stdout));
  }
unknown's avatar
unknown committed
1950
  if (param->records != info->state->records)
1951
  {
unknown's avatar
unknown committed
1952 1953 1954 1955
    _ma_check_print_error(param,
                          "Record-count is not ok; found %-10s  Should be: %s",
                          llstr(param->records,llbuff),
                          llstr(info->state->records,llbuff2));
1956 1957 1958
    error=1;
  }
  else if (param->record_checksum &&
unknown's avatar
unknown committed
1959
	   param->record_checksum != param->tmp_record_checksum)
1960 1961
  {
    _ma_check_print_error(param,
unknown's avatar
unknown committed
1962
                          "Key pointers and record positions doesn't match");
1963 1964 1965
    error=1;
  }
  else if (param->glob_crc != info->state->checksum &&
1966
	   (share->options &
1967 1968 1969
	    (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)))
  {
    _ma_check_print_warning(param,
unknown's avatar
unknown committed
1970
                            "Record checksum is not the same as checksum stored in the index file");
1971 1972 1973 1974
    error=1;
  }
  else if (!extend)
  {
unknown's avatar
unknown committed
1975
    uint key;
1976
    for (key=0 ; key < share->base.keys;  key++)
1977
    {
unknown's avatar
unknown committed
1978
      if (param->tmp_key_crc[key] != param->key_crc[key] &&
1979
          !(share->keyinfo[key].flag & (HA_FULLTEXT | HA_SPATIAL)))
1980 1981
      {
	_ma_check_print_error(param,"Checksum for key: %2d doesn't match checksum for records",
unknown's avatar
unknown committed
1982
                              key+1);
1983 1984 1985 1986 1987
	error=1;
      }
    }
  }

unknown's avatar
unknown committed
1988
  if (param->del_length != info->state->empty)
1989 1990
  {
    _ma_check_print_warning(param,
unknown's avatar
unknown committed
1991 1992 1993
                            "Found %s deleted space.   Should be %s",
                            llstr(param->del_length,llbuff2),
                            llstr(info->state->empty,llbuff));
1994
  }
unknown's avatar
unknown committed
1995 1996
  /* Skip following checks for BLOCK RECORD as they don't make any sence */
  if (share->data_file_type != BLOCK_RECORD)
1997
  {
unknown's avatar
unknown committed
1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
    if (param->used + param->empty + param->del_length !=
        info->state->data_file_length)
    {
      _ma_check_print_warning(param,
                              "Found %s record data and %s unused data and %s deleted data",
                              llstr(param->used, llbuff),
                              llstr(param->empty,llbuff2),
                              llstr(param->del_length,llbuff3));
      _ma_check_print_warning(param,
                              "Total %s   Should be: %s",
                              llstr((param->used+param->empty +
                                     param->del_length), llbuff),
                              llstr(info->state->data_file_length,llbuff2));
    }
    if (param->del_blocks != info->state->del)
    {
      _ma_check_print_warning(param,
                              "Found %10s deleted blocks       Should be: %s",
                              llstr(param->del_blocks,llbuff),
                              llstr(info->state->del,llbuff2));
    }
    if (param->splits != share->state.split)
    {
      _ma_check_print_warning(param,
                              "Found %10s parts                Should be: %s parts",
                              llstr(param->splits, llbuff),
                              llstr(share->state.split,llbuff2));
    }
2026 2027 2028 2029 2030
  }
  if (param->testflag & T_INFO)
  {
    if (param->warning_printed || param->error_printed)
      puts("");
unknown's avatar
unknown committed
2031
    if (param->used != 0 && ! param->error_printed)
2032
    {
unknown's avatar
unknown committed
2033 2034 2035 2036 2037
      if (param->records)
      {
        printf("Records:%18s    M.recordlength:%9lu   Packed:%14.0f%%\n",
               llstr(param->records,llbuff),
               (long)((param->used - param->link_used)/param->records),
2038 2039
               (share->base.blobs ? 0.0 :
                (ulonglong2double((ulonglong) share->base.reclength *
unknown's avatar
unknown committed
2040 2041
                                  param->records)-
                 my_off_t2double(param->used))/
2042
                ulonglong2double((ulonglong) share->base.reclength *
unknown's avatar
unknown committed
2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054
                                 param->records)*100.0));
        printf("Recordspace used:%9.0f%%   Empty space:%12d%%  Blocks/Record: %6.2f\n",
               (ulonglong2double(param->used - param->link_used)/
                ulonglong2double(param->used-param->link_used+param->empty)*100.0),
               (!param->records ? 100 :
                (int) (ulonglong2double(param->del_length+param->empty)/
                       my_off_t2double(param->used)*100.0)),
               ulonglong2double(param->splits - param->del_blocks) /
               param->records);
      }
      else
        printf("Records:%18s\n", "0");
2055 2056
    }
    printf("Record blocks:%12s    Delete blocks:%10s\n",
unknown's avatar
unknown committed
2057
           llstr(param->splits - param->del_blocks, llbuff),
unknown's avatar
unknown committed
2058
           llstr(param->del_blocks, llbuff2));
2059
    printf("Record data:  %12s    Deleted data: %10s\n",
unknown's avatar
unknown committed
2060
           llstr(param->used - param->link_used,llbuff),
unknown's avatar
unknown committed
2061
           llstr(param->del_length, llbuff2));
unknown's avatar
unknown committed
2062 2063 2064 2065
    printf("Empty space:  %12s    Linkdata:     %10s\n",
           llstr(param->empty, llbuff),llstr(param->link_used, llbuff2));
    if (param->lost)
      printf("Lost space:   %12s", llstr(param->lost, llbuff));
2066
  }
unknown's avatar
unknown committed
2067
  my_free((uchar*) record,MYF(0));
2068
  DBUG_RETURN (error);
unknown's avatar
unknown committed
2069

unknown's avatar
unknown committed
2070
err:
unknown's avatar
unknown committed
2071
  my_free((uchar*) record,MYF(0));
2072 2073 2074 2075 2076
  param->testflag|=T_RETRY_WITHOUT_QUICK;
  DBUG_RETURN(1);
} /* maria_chk_data_link */


unknown's avatar
unknown committed
2077 2078 2079 2080 2081 2082 2083 2084 2085
/**
  Prepares a table for a repair or index sort: flushes pages, records durably
  in the table that it is undergoing the operation (if that op crashes, that
  info will serve for Recovery and the user).

  If we start overwriting the index file, and crash then, old REDOs will
  be tried and fail. To prevent that, we bump skip_redo_lsn, and thus we have
  to flush and sync pages so that old REDOs can be skipped.
  If this is not a bulk insert, which Recovery can handle gracefully (by
2086
  truncating files, see UNDO_BULK_INSERT) we also mark the table
unknown's avatar
unknown committed
2087 2088
  crashed-on-repair, so that user knows it has to re-repair. If bulk insert we
  shouldn't mark it crashed-on-repair, because if we did this, the UNDO phase
2089
  would skip the table (UNDO_BULK_INSERT would not be applied),
unknown's avatar
unknown committed
2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138
  and maria_chk would not improve that.
  If this is an OPTIMIZE which merely sorts index, we need to do the same
  too: old REDOs should not apply to the new index file.
  Only the flush is needed when in maria_chk which is not crash-safe.

  @param  info             table
  @param  param            repair parameters
  @param  discard_index    if index pages can be thrown away
*/

static my_bool protect_against_repair_crash(MARIA_HA *info,
                                            const HA_CHECK *param,
                                            my_bool discard_index)
{
  MARIA_SHARE *share= info->s;

  /*
    There are other than recovery-related reasons to do the writes below:
    - the physical size of the data file is sometimes used during repair: we
    need to flush to have it exact
    - we flush the state because maria_open(HA_OPEN_COPY) will want to read
    it from disk.
  */
  if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
                            FLUSH_FORCE_WRITE,
                            discard_index ? FLUSH_IGNORE_CHANGED :
                            FLUSH_FORCE_WRITE) ||
      (share->changed && _ma_state_info_write(share, 1|2|4)))
    return TRUE;
  /* In maria_chk this is not needed: */
  if (maria_multi_threaded && share->base.born_transactional)
  {
    if ((param->testflag & T_NO_CREATE_RENAME_LSN) == 0)
    {
      /* this can be true only for a transactional table */
      maria_mark_crashed_on_repair(info);
      if (_ma_state_info_write(share, 1|4))
        return TRUE;
    }
    if (translog_status == TRANSLOG_OK &&
        _ma_update_state_lsns(share, translog_get_horizon(), FALSE, FALSE))
      return TRUE;
    if (_ma_sync_table_files(info))
      return TRUE;
  }
  return FALSE;
}


2139 2140 2141 2142
/**
   @brief Initialize variables for repair
*/

2143 2144 2145 2146
static int initialize_variables_for_repair(HA_CHECK *param,
                                           MARIA_SORT_INFO *sort_info,
                                           MARIA_SORT_PARAM *sort_param,
                                           MARIA_HA *info,
2147
                                           my_bool rep_quick)
2148
{
2149 2150 2151 2152
  MARIA_SHARE *share= info->s;

  bzero((char*) sort_info,  sizeof(*sort_info));
  bzero((char*) sort_param, sizeof(*sort_param));
2153 2154

  param->testflag|= T_REP;                     /* for easy checking */
2155
  if (share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
2156 2157
    param->testflag|= T_CALC_CHECKSUM;
  param->glob_crc= 0;
2158 2159 2160 2161
  if (rep_quick)
    param->testflag|= T_QUICK;
  else
    param->testflag&= ~T_QUICK;
2162
  param->org_key_map= share->state.key_map;
2163 2164

  sort_param->sort_info= sort_info;
2165
  sort_param->fix_datafile= ! rep_quick;
2166 2167 2168 2169
  sort_param->calc_checksum= test(param->testflag & T_CALC_CHECKSUM);
  sort_info->info= sort_info->new_info= info;
  sort_info->param= param;
  set_data_file_type(sort_info, info->s);
2170
  sort_info->org_data_file_type= share->data_file_type;
2171 2172 2173 2174

  bzero(&info->rec_cache, sizeof(info->rec_cache));
  info->rec_cache.file= info->dfile.file;
  info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
2175

unknown's avatar
unknown committed
2176 2177 2178
  if (protect_against_repair_crash(info, param, !test(param->testflag &
                                                      T_CREATE_MISSING_KEYS)))
    return 1;
2179

unknown's avatar
unknown committed
2180
  /* calculate max_records */
2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192
  sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
  if ((param->testflag & T_CREATE_MISSING_KEYS) ||
      sort_info->org_data_file_type == COMPRESSED_RECORD)
    sort_info->max_records= info->state->records;
  else
  {
    ulong rec_length;
    rec_length= max(share->base.min_pack_length,
                    share->base.min_block_length);
    sort_info->max_records= (ha_rows) (sort_info->filelength / rec_length);
  }
  return 0;
2193 2194 2195
}


2196 2197 2198 2199
/*
  Recover old table by reading each record and writing all keys

  NOTES
2200 2201 2202 2203 2204
    Save new datafile-name in temp_filename.
    We overwrite the index file as we go (writekeys() for example), so if we
    crash during this the table is unusable and user (or Recovery in the
    future) must repeat the REPAIR/OPTIMIZE operation. We could use a
    temporary index file in the future (drawback: more disk space).
2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216

  IMPLEMENTATION (for hard repair with block format)
   - Create new, unrelated MARIA_HA of the table
   - Create new datafile and associate it with new handler
   - Reset all statistic information in new handler
   - Copy all data to new handler with normal write operations
   - Move state of new handler to old handler
   - Close new handler
   - Close data file in old handler
   - Rename old data file to new data file.
   - Reopen data file in old handler
*/
2217 2218

int maria_repair(HA_CHECK *param, register MARIA_HA *info,
2219
                 char *name, my_bool rep_quick)
2220
{
2221
  int error, got_error;
2222 2223 2224 2225
  uint i;
  ha_rows start_records,new_header_length;
  my_off_t del;
  File new_file;
2226
  MARIA_SHARE *share= info->s;
2227 2228 2229
  char llbuff[22],llbuff2[22];
  MARIA_SORT_INFO sort_info;
  MARIA_SORT_PARAM sort_param;
unknown's avatar
unknown committed
2230 2231
  my_bool block_record, scan_inited= 0,
    reenable_logging= share->now_transactional;
2232
  enum data_file_type org_data_file_type= share->data_file_type;
unknown's avatar
unknown committed
2233 2234
  myf sync_dir= ((share->now_transactional && !share->temporary) ?
                 MY_SYNC_DIR : 0);
2235 2236
  DBUG_ENTER("maria_repair");

2237
  got_error= 1;
2238
  new_file= -1;
2239
  start_records= info->state->records;
2240 2241 2242
  if (!(param->testflag & T_SILENT))
  {
    printf("- recovering (with keycache) MARIA-table '%s'\n",name);
2243
    printf("Data records: %s\n", llstr(start_records, llbuff));
2244 2245
  }

2246 2247
  if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
                                      rep_quick))
unknown's avatar
unknown committed
2248
    goto err;
unknown's avatar
unknown committed
2249

unknown's avatar
unknown committed
2250
  if (reenable_logging)
unknown's avatar
unknown committed
2251
    _ma_tmp_disable_logging_for_table(info, 0);
unknown's avatar
unknown committed
2252

2253
  sort_param.current_filepos= sort_param.filepos= new_header_length=
2254
    ((param->testflag & T_UNPACK) ? 0L : share->pack.header_length);
2255

2256 2257 2258
  if (!rep_quick)
  {
    /* Get real path for data file */
unknown's avatar
unknown committed
2259 2260 2261 2262 2263
    if ((new_file= my_create(fn_format(param->temp_filename,
                                       share->data_file_name, "",
                                       DATA_TMP_EXT, 2+4),
                             0,param->tmpfile_createflag,
                             MYF(0))) < 0)
2264 2265 2266 2267 2268
    {
      _ma_check_print_error(param,"Can't create new tempfile: '%s'",
			   param->temp_filename);
      goto err;
    }
2269
    if (new_header_length &&
unknown's avatar
unknown committed
2270 2271
        maria_filecopy(param, new_file, info->dfile.file, 0L,
                       new_header_length, "datafile-header"))
2272
      goto err;
2273
    share->state.dellink= HA_OFFSET_ERROR;
2274
    info->rec_cache.file= new_file;             /* For sort_delete_record */
2275
    if (share->data_file_type == BLOCK_RECORD ||
2276
        (param->testflag & T_UNPACK))
2277
    {
2278
      if (create_new_data_handle(&sort_param, new_file))
2279
        goto err;
2280
      sort_info.new_info->rec_cache.file= new_file;
2281 2282 2283
    }
  }

2284 2285
  block_record= sort_info.new_info->s->data_file_type == BLOCK_RECORD;

2286 2287 2288 2289 2290 2291 2292
  if (org_data_file_type != BLOCK_RECORD)
  {
    /* We need a read buffer to read rows in big blocks */
    if (init_io_cache(&param->read_cache, info->dfile.file,
                      (uint) param->read_buffer_length,
                      READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)))
      goto err;
2293
  }
2294 2295 2296 2297
  if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
  {
    /* When writing to not block records, we need a write buffer */
    if (!rep_quick)
2298 2299
    {
      if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
2300 2301
                        (uint) param->write_buffer_length,
                        WRITE_CACHE, new_header_length, 1,
2302
                        MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
2303
        goto err;
2304 2305
      sort_info.new_info->opt_flag|=WRITE_CACHE_USED;
    }
2306
  }
2307
  else if (block_record)
2308 2309 2310 2311 2312 2313
  {
    scan_inited= 1;
    if (maria_scan_init(sort_info.info))
      goto err;
  }

2314
  if (!(sort_param.record=(uchar*) my_malloc((uint) share->base.pack_reclength,
2315 2316
					   MYF(0))) ||
      _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
2317
                       share->base.default_rec_buff_size))
2318 2319 2320
  {
    _ma_check_print_error(param, "Not enough memory for extra record");
    goto err;
2321
  }
2322

2323 2324
  sort_param.read_cache=param->read_cache;
  sort_param.pos=sort_param.max_pos=share->pack.header_length;
2325
  param->read_cache.end_of_file= sort_info.filelength;
2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336
  sort_param.master=1;
  sort_info.max_records= ~(ha_rows) 0;

  del=info->state->del;
  info->state->records=info->state->del=share->state.split=0;
  info->state->empty=0;

  /*
    Clear all keys. Note that all key blocks allocated until now remain
    "dead" parts of the key file. (Bug #4692)
  */
2337
  for (i=0 ; i < share->base.keys ; i++)
2338 2339 2340
    share->state.key_root[i]= HA_OFFSET_ERROR;

  /* Drop the delete chain. */
unknown's avatar
unknown committed
2341
  share->state.key_del=  HA_OFFSET_ERROR;
2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356

  /*
    If requested, activate (enable) all keys in key_map. In this case,
    all indexes will be (re-)built.
  */
  if (param->testflag & T_CREATE_MISSING_KEYS)
    maria_set_all_keys_active(share->state.key_map, share->base.keys);

  info->state->key_file_length=share->base.keystart;

  maria_lock_memory(param);			/* Everything is alloced */

  /* Re-create all keys, which are set in key_map. */
  while (!(error=sort_get_next_record(&sort_param)))
  {
2357 2358 2359
    if (block_record && _ma_sort_write_record(&sort_param))
      goto err;

2360
    if (writekeys(&sort_param))
2361 2362 2363
    {
      if (my_errno != HA_ERR_FOUND_DUPP_KEY)
	goto err;
unknown's avatar
unknown committed
2364
      DBUG_DUMP("record",(uchar*) sort_param.record,share->base.pack_reclength);
2365 2366
      _ma_check_print_info(param,
                           "Duplicate key %2d for record at %10s against new record at %10s",
2367 2368
			  info->errkey+1,
			  llstr(sort_param.start_recpos,llbuff),
unknown's avatar
unknown committed
2369
			  llstr(info->dup_key_pos,llbuff2));
2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383
      if (param->testflag & T_VERBOSE)
      {
	VOID(_ma_make_key(info,(uint) info->errkey,info->lastkey,
			  sort_param.record,0L));
	_ma_print_key(stdout,share->keyinfo[info->errkey].seg,info->lastkey,
		      USE_WHOLE_KEY);
      }
      sort_info.dupp++;
      if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
      {
        param->testflag|=T_RETRY_WITHOUT_QUICK;
	param->error_printed=1;
	goto err;
      }
unknown's avatar
unknown committed
2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394
      /* purecov: begin tested */
      if (block_record)
      {
        sort_info.new_info->state->records--;
        if ((*sort_info.new_info->s->write_record_abort)(sort_info.new_info))
        {
          _ma_check_print_error(param,"Couldn't delete duplicate row");
          goto err;
        }
      }
      /* purecov: end */
2395
      continue;
2396
    }
2397 2398 2399 2400 2401 2402 2403
    if (!block_record)
    {
      if (_ma_sort_write_record(&sort_param))
        goto err;
      /* Filepos is pointer to where next row will be stored */
      sort_param.current_filepos= sort_param.filepos;
    }
2404
  }
2405
  if (error > 0 || maria_write_data_suffix(&sort_info, !rep_quick) ||
2406 2407
      flush_io_cache(&sort_info.new_info->rec_cache) ||
      param->read_cache.error < 0)
2408 2409 2410 2411 2412 2413
    goto err;

  if (param->testflag & T_WRITE_LOOP)
  {
    VOID(fputs("          \r",stdout)); VOID(fflush(stdout));
  }
unknown's avatar
unknown committed
2414
  if (my_chsize(share->kfile.file, info->state->key_file_length, 0, MYF(0)))
2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429
  {
    _ma_check_print_warning(param,
			   "Can't change size of indexfile, error: %d",
			   my_errno);
    goto err;
  }

  if (rep_quick && del+sort_info.dupp != info->state->del)
  {
    _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records");
    _ma_check_print_error(param,"Run recovery again without -q");
    param->retry_repair=1;
    param->testflag|=T_RETRY_WITHOUT_QUICK;
    goto err;
  }
2430

2431 2432 2433
  if (param->testflag & T_SAFE_REPAIR)
  {
    /* Don't repair if we loosed more than one row */
2434
    if (sort_info.new_info->state->records+1 < start_records)
2435 2436 2437 2438 2439 2440
    {
      info->state->records=start_records;
      goto err;
    }
  }

2441 2442
  VOID(end_io_cache(&sort_info.new_info->rec_cache));
  info->opt_flag&= ~WRITE_CACHE_USED;
unknown's avatar
unknown committed
2443 2444 2445 2446 2447 2448

  /*
    As we have read the data file (sort_get_next_record()) we may have
    cached, non-changed blocks of it in the page cache. We must throw them
    away as we are going to close their descriptor ('new_file'). We also want
    to flush any index block, so that it is ready for the upcoming sync.
unknown's avatar
unknown committed
2449
  */
unknown's avatar
unknown committed
2450
  if (_ma_flush_table_files_before_swap(param, info))
2451 2452
    goto err;

2453 2454
  if (!rep_quick)
  {
2455
    sort_info.new_info->state->data_file_length= sort_param.filepos;
2456 2457 2458 2459 2460 2461 2462 2463
    if (sort_info.new_info != sort_info.info)
    {
      MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
      if (maria_close(sort_info.new_info))
      {
        _ma_check_print_error(param, "Got error %d on close", my_errno);
        goto err;
      }
2464
      copy_data_file_state(&share->state, &save_state);
2465
      new_file= -1;
2466
      sort_info.new_info= info;
2467
    }
2468
    share->state.version=(ulong) time((time_t*) 0);	/* Force reopen */
2469 2470 2471 2472

    /* Replace the actual file with the temporary file */
    if (new_file >= 0)
      my_close(new_file, MYF(MY_WME));
unknown's avatar
unknown committed
2473 2474
    new_file= -1;
    change_data_file_descriptor(info, -1);
2475 2476 2477
    if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT,
                                DATA_TMP_EXT,
                                (param->testflag & T_BACKUP_DATA ?
2478 2479
                                 MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
                                sync_dir) ||
2480 2481 2482 2483
        _ma_open_datafile(info, share, -1))
    {
      goto err;
    }
2484 2485 2486
  }
  else
  {
2487
    info->state->data_file_length= sort_param.max_pos;
2488 2489
  }
  if (param->testflag & T_CALC_CHECKSUM)
2490
    info->state->checksum= param->glob_crc;
2491 2492 2493 2494 2495 2496

  if (!(param->testflag & T_SILENT))
  {
    if (start_records != info->state->records)
      printf("Data records: %s\n", llstr(info->state->records,llbuff));
  }
unknown's avatar
unknown committed
2497 2498 2499 2500
  if (sort_info.dupp)
    _ma_check_print_warning(param,
                            "%s records have been removed",
                            llstr(sort_info.dupp,llbuff));
2501

unknown's avatar
unknown committed
2502
  got_error= 0;
2503 2504
  /* If invoked by external program that uses thr_lock */
  if (&share->state.state != info->state)
2505
    memcpy(&share->state.state, info->state, sizeof(*info->state));
2506 2507

err:
2508 2509 2510
  if (scan_inited)
    maria_scan_end(sort_info.info);

unknown's avatar
unknown committed
2511
  VOID(end_io_cache(&param->read_cache));
2512
  VOID(end_io_cache(&sort_info.new_info->rec_cache));
unknown's avatar
unknown committed
2513
  info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
2514
  sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
unknown's avatar
unknown committed
2515
  /* this below could fail, shouldn't we detect error? */
2516 2517 2518 2519 2520
  if (got_error)
  {
    if (! param->error_printed)
      _ma_check_print_error(param,"%d for record at pos %s",my_errno,
		  llstr(sort_param.start_recpos,llbuff));
unknown's avatar
unknown committed
2521
    (void)_ma_flush_table_files_before_swap(param, info);
2522 2523
    if (sort_info.new_info && sort_info.new_info != sort_info.info)
    {
2524
      unuse_data_file_descriptor(sort_info.new_info);
2525 2526
      maria_close(sort_info.new_info);
    }
2527 2528 2529
    if (new_file >= 0)
    {
      VOID(my_close(new_file,MYF(0)));
unknown's avatar
unknown committed
2530
      VOID(my_delete(param->temp_filename, MYF(MY_WME)));
2531 2532 2533
    }
    maria_mark_crashed_on_repair(info);
  }
unknown's avatar
unknown committed
2534 2535
  /* If caller had disabled logging it's not up to us to re-enable it */
  if (reenable_logging)
2536
    _ma_reenable_logging_for_table(info, FALSE);
unknown's avatar
unknown committed
2537

unknown's avatar
unknown committed
2538
  my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR));
2539 2540
  my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR));
  my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR));
unknown's avatar
unknown committed
2541 2542
  if (!got_error && (param->testflag & T_UNPACK))
    restore_data_file_type(share);
2543
  share->state.changed|= (STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES |
unknown's avatar
unknown committed
2544 2545 2546
			  STATE_NOT_ANALYZED | STATE_NOT_ZEROFILLED);
  if (!rep_quick)
    share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_MOVABLE);
2547 2548 2549 2550
  DBUG_RETURN(got_error);
}


unknown's avatar
unknown committed
2551
/* Uppdate keyfile when doing repair */
2552

2553
static int writekeys(MARIA_SORT_PARAM *sort_param)
2554
{
2555
  uint i;
unknown's avatar
unknown committed
2556
  uchar *key;
2557 2558 2559 2560
  MARIA_HA *info=     sort_param->sort_info->info;
  MARIA_SHARE *share= info->s;
  uchar     *buff=    sort_param->record;
  my_off_t filepos=   sort_param->current_filepos;
2561 2562
  DBUG_ENTER("writekeys");

2563 2564
  key= info->lastkey+share->base.max_key_length;
  for (i=0 ; i < share->base.keys ; i++)
2565
  {
2566
    if (maria_is_key_active(share->state.key_map, i))
2567
    {
2568
      if (share->keyinfo[i].flag & HA_FULLTEXT )
2569
      {
2570
        if (_ma_ft_add(info,i, key,buff,filepos))
2571 2572 2573
	  goto err;
      }
#ifdef HAVE_SPATIAL
2574
      else if (share->keyinfo[i].flag & HA_SPATIAL)
2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596
      {
	uint key_length= _ma_make_key(info,i,key,buff,filepos);
	if (maria_rtree_insert(info, i, key, key_length))
	  goto err;
      }
#endif /*HAVE_SPATIAL*/
      else
      {
	uint key_length= _ma_make_key(info,i,key,buff,filepos);
	if (_ma_ck_write(info,i,key,key_length))
	  goto err;
      }
    }
  }
  DBUG_RETURN(0);

 err:
  if (my_errno == HA_ERR_FOUND_DUPP_KEY)
  {
    info->errkey=(int) i;			/* This key was found */
    while ( i-- > 0 )
    {
2597
      if (maria_is_key_active(share->state.key_map, i))
2598
      {
2599
	if (share->keyinfo[i].flag & HA_FULLTEXT)
2600
        {
2601
          if (_ma_ft_del(info,i,key,buff,filepos))
2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613
	    break;
        }
        else
	{
	  uint key_length= _ma_make_key(info,i,key,buff,filepos);
	  if (_ma_ck_delete(info,i,key,key_length))
	    break;
	}
      }
    }
  }
  /* Remove checksum that was added to glob_crc in sort_get_next_record */
2614
  if (sort_param->calc_checksum)
unknown's avatar
unknown committed
2615
    sort_param->sort_info->param->glob_crc-= info->cur_row.checksum;
2616 2617 2618 2619 2620 2621 2622
  DBUG_PRINT("error",("errno: %d",my_errno));
  DBUG_RETURN(-1);
} /* writekeys */


	/* Change all key-pointers that points to a records */

unknown's avatar
unknown committed
2623
int maria_movepoint(register MARIA_HA *info, uchar *record,
unknown's avatar
unknown committed
2624 2625
                    MARIA_RECORD_POS oldpos, MARIA_RECORD_POS newpos,
                    uint prot_key)
2626 2627
{
  register uint i;
unknown's avatar
unknown committed
2628
  uchar *key;
2629
  uint key_length;
2630
  MARIA_SHARE *share= info->s;
2631 2632
  DBUG_ENTER("maria_movepoint");

2633 2634
  key= info->lastkey+share->base.max_key_length;
  for (i=0 ; i < share->base.keys; i++)
2635
  {
2636
    if (i != prot_key && maria_is_key_active(share->state.key_map, i))
2637 2638
    {
      key_length= _ma_make_key(info,i,key,record,oldpos);
2639
      if (share->keyinfo[i].flag & HA_NOSAME)
2640 2641 2642
      {					/* Change pointer direct */
	uint nod_flag;
	MARIA_KEYDEF *keyinfo;
2643
	keyinfo=share->keyinfo+i;
2644 2645
	if (_ma_search(info,keyinfo,key,USE_WHOLE_KEY,
		       (uint) (SEARCH_SAME | SEARCH_SAVE_BUFF),
2646
		       share->state.key_root[i]))
2647
	  DBUG_RETURN(-1);
2648
	nod_flag= _ma_test_if_nod(share, info->buff);
2649
	_ma_dpointer(info,info->int_keypos-nod_flag-
2650
		     share->rec_reflength,newpos);
2651 2652 2653
	if (_ma_write_keypage(info, keyinfo, info->last_keypage,
                              PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
                              info->buff))
2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685
	  DBUG_RETURN(-1);
      }
      else
      {					/* Change old key to new */
	if (_ma_ck_delete(info,i,key,key_length))
	  DBUG_RETURN(-1);
	key_length= _ma_make_key(info,i,key,record,newpos);
	if (_ma_ck_write(info,i,key,key_length))
	  DBUG_RETURN(-1);
      }
    }
  }
  DBUG_RETURN(0);
} /* maria_movepoint */


	/* Tell system that we want all memory for our cache */

void maria_lock_memory(HA_CHECK *param __attribute__((unused)))
{
#ifdef SUN_OS				/* Key-cacheing thrases on sun 4.1 */
  if (param->opt_maria_lock_memory)
  {
    int success = mlockall(MCL_CURRENT);	/* or plock(DATLOCK); */
    if (geteuid() == 0 && success != 0)
      _ma_check_print_warning(param,
			     "Failed to lock memory. errno %d",my_errno);
  }
#endif
} /* maria_lock_memory */


unknown's avatar
unknown committed
2686
/**
unknown's avatar
unknown committed
2687
   Flush all changed blocks to disk.
unknown's avatar
unknown committed
2688 2689

   We release blocks as it's unlikely that they would all be needed soon.
unknown's avatar
unknown committed
2690 2691
   This function needs to be called before swapping data or index files or
   syncing them.
2692

unknown's avatar
unknown committed
2693 2694 2695 2696
   @param  param           description of the repair operation
   @param  info            table
*/

unknown's avatar
unknown committed
2697 2698
static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
                                                 MARIA_HA *info)
2699
{
unknown's avatar
unknown committed
2700
  DBUG_ENTER("_ma_flush_table_files_before_swap");
unknown's avatar
unknown committed
2701
  if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
unknown's avatar
unknown committed
2702
                            FLUSH_RELEASE, FLUSH_RELEASE))
2703
  {
unknown's avatar
unknown committed
2704 2705
    _ma_check_print_error(param, "%d when trying to write buffers", my_errno);
    DBUG_RETURN(TRUE);
2706
  }
unknown's avatar
unknown committed
2707 2708
  DBUG_RETURN(FALSE);
}
2709 2710 2711 2712


	/* Sort index for more efficent reads */

unknown's avatar
unknown committed
2713
int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name)
2714 2715 2716 2717 2718 2719 2720
{
  reg2 uint key;
  reg1 MARIA_KEYDEF *keyinfo;
  File new_file;
  my_off_t index_pos[HA_MAX_POSSIBLE_KEY];
  uint r_locks,w_locks;
  int old_lock;
2721
  MARIA_SHARE *share= info->s;
2722
  MARIA_STATE_INFO old_state;
2723 2724
  myf sync_dir= ((share->now_transactional && !share->temporary) ?
                 MY_SYNC_DIR : 0);
2725 2726
  DBUG_ENTER("maria_sort_index");

2727 2728 2729 2730 2731 2732
  /* cannot sort index files with R-tree indexes */
  for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
       key++,keyinfo++)
    if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
      DBUG_RETURN(0);

2733 2734 2735
  if (!(param->testflag & T_SILENT))
    printf("- Sorting index for MARIA-table '%s'\n",name);

unknown's avatar
unknown committed
2736 2737 2738
  if (protect_against_repair_crash(info, param, FALSE))
    DBUG_RETURN(1);

2739 2740 2741 2742 2743 2744 2745 2746 2747 2748
  /* Get real path for index file */
  fn_format(param->temp_filename,name,"", MARIA_NAME_IEXT,2+4+32);
  if ((new_file=my_create(fn_format(param->temp_filename,param->temp_filename,
				    "", INDEX_TMP_EXT,2+4),
			  0,param->tmpfile_createflag,MYF(0))) <= 0)
  {
    _ma_check_print_error(param,"Can't create new tempfile: '%s'",
			 param->temp_filename);
    DBUG_RETURN(-1);
  }
unknown's avatar
unknown committed
2749
  if (maria_filecopy(param, new_file, share->kfile.file, 0L,
unknown's avatar
unknown committed
2750
                     (ulong) share->base.keystart, "headerblock"))
2751 2752 2753 2754 2755 2756
    goto err;

  param->new_file_pos=share->base.keystart;
  for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
       key++,keyinfo++)
  {
2757
    if (! maria_is_key_active(share->state.key_map, key))
2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770
      continue;

    if (share->state.key_root[key] != HA_OFFSET_ERROR)
    {
      index_pos[key]=param->new_file_pos;	/* Write first block here */
      if (sort_one_index(param,info,keyinfo,share->state.key_root[key],
			 new_file))
	goto err;
    }
    else
      index_pos[key]= HA_OFFSET_ERROR;		/* No blocks */
  }

unknown's avatar
unknown committed
2771
  /* Flush key cache for this file if we are calling this outside maria_chk */
unknown's avatar
unknown committed
2772 2773
  flush_pagecache_blocks(share->pagecache, &share->kfile,
                         FLUSH_IGNORE_CHANGED);
2774 2775 2776 2777 2778 2779 2780 2781 2782 2783

  share->state.version=(ulong) time((time_t*) 0);
  old_state= share->state;			/* save state if not stored */
  r_locks=   share->r_locks;
  w_locks=   share->w_locks;
  old_lock=  info->lock_type;

	/* Put same locks as old file */
  share->r_locks= share->w_locks= share->tot_locks= 0;
  (void) _ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE);
unknown's avatar
unknown committed
2784
  pthread_mutex_lock(&share->intern_lock);
unknown's avatar
unknown committed
2785 2786
  VOID(my_close(share->kfile.file, MYF(MY_WME)));
  share->kfile.file = -1;
unknown's avatar
unknown committed
2787
  pthread_mutex_unlock(&share->intern_lock);
2788
  VOID(my_close(new_file,MYF(MY_WME)));
unknown's avatar
unknown committed
2789
  if (maria_change_to_newfile(share->index_file_name, MARIA_NAME_IEXT,
2790
                              INDEX_TMP_EXT, sync_dir) ||
2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802
      _ma_open_keyfile(share))
    goto err2;
  info->lock_type= F_UNLCK;			/* Force maria_readinfo to lock */
  _ma_readinfo(info,F_WRLCK,0);			/* Will lock the table */
  info->lock_type=  old_lock;
  share->r_locks=   r_locks;
  share->w_locks=   w_locks;
  share->tot_locks= r_locks+w_locks;
  share->state=     old_state;			/* Restore old state */

  info->state->key_file_length=param->new_file_pos;
  info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
2803 2804 2805
  for (key=0 ; key < share->base.keys ; key++)
    share->state.key_root[key]=index_pos[key];
  share->state.key_del=  HA_OFFSET_ERROR;
2806

2807
  share->state.changed&= ~STATE_NOT_SORTED_PAGES;
unknown's avatar
unknown committed
2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818
  DBUG_EXECUTE_IF("maria_flush_whole_log",
                  {
                    DBUG_PRINT("maria_flush_whole_log", ("now"));
                    translog_flush(translog_get_horizon());
                  });
  DBUG_EXECUTE_IF("maria_crash_sort_index",
                  {
                    DBUG_PRINT("maria_crash_sort_index", ("now"));
                    fflush(DBUG_FILE);
                    abort();
                  });
2819 2820 2821 2822 2823 2824 2825 2826 2827 2828
  DBUG_RETURN(0);

err:
  VOID(my_close(new_file,MYF(MY_WME)));
err2:
  VOID(my_delete(param->temp_filename,MYF(MY_WME)));
  DBUG_RETURN(-1);
} /* maria_sort_index */


unknown's avatar
unknown committed
2829 2830 2831 2832 2833 2834 2835 2836 2837 2838
/**
  @brief put CRC on the page

  @param buff            reference on the page buffer.
  @param pos             position of the page in the file.
  @param length          length of the page
*/

static void put_crc(char *buff, my_off_t pos, MARIA_SHARE *share)
{
2839 2840
  maria_page_crc_set_index(buff, (pgcache_page_no_t) (pos / share->block_size),
                           (uchar*) share);
unknown's avatar
unknown committed
2841 2842 2843
}


2844 2845
	 /* Sort records recursive using one index */

unknown's avatar
unknown committed
2846 2847
static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
                          MARIA_KEYDEF *keyinfo,
2848 2849 2850
			  my_off_t pagepos, File new_file)
{
  uint length,nod_flag,used_length, key_length;
unknown's avatar
unknown committed
2851 2852
  uchar *buff,*keypos,*endpos;
  uchar key[HA_MAX_POSSIBLE_KEY_BUFF];
2853
  my_off_t new_page_pos,next_page;
2854
  MARIA_SHARE *share= info->s;
2855 2856
  DBUG_ENTER("sort_one_index");

2857 2858
  /* cannot walk over R-tree indices */
  DBUG_ASSERT(keyinfo->key_alg != HA_KEY_ALG_RTREE);
2859 2860 2861
  new_page_pos=param->new_file_pos;
  param->new_file_pos+=keyinfo->block_length;

unknown's avatar
unknown committed
2862
  if (!(buff= (uchar*) my_alloca((uint) keyinfo->block_length)))
2863 2864 2865 2866
  {
    _ma_check_print_error(param,"Not enough memory for key block");
    DBUG_RETURN(-1);
  }
2867 2868
  if (!_ma_fetch_keypage(info, keyinfo, pagepos,PAGECACHE_LOCK_LEFT_UNLOCKED,
                         DFLT_INIT_HITS, buff, 0, 0))
2869
  {
unknown's avatar
unknown committed
2870
    report_keypage_fault(param, info, pagepos);
2871 2872
    goto err;
  }
2873
  if ((nod_flag=_ma_test_if_nod(share, buff)) || keyinfo->flag & HA_FULLTEXT)
2874
  {
2875 2876
    used_length= _ma_get_page_used(share, buff);
    keypos=buff + share->keypage_header + nod_flag;
2877
    endpos=buff + used_length;
2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890
    for ( ;; )
    {
      if (nod_flag)
      {
	next_page= _ma_kpos(nod_flag,keypos);
        /* Save new pos */
	_ma_kpointer(info,keypos-nod_flag,param->new_file_pos);
	if (sort_one_index(param,info,keyinfo,next_page,new_file))
	{
	  DBUG_PRINT("error",
		     ("From page: %ld, keyoffset: %lu  used_length: %d",
		      (ulong) pagepos, (ulong) (keypos - buff),
		      (int) used_length));
unknown's avatar
unknown committed
2891
	  DBUG_DUMP("buff",(uchar*) buff,used_length);
2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907
	  goto err;
	}
      }
      if (keypos >= endpos ||
	  (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,key)) == 0)
	break;
      DBUG_ASSERT(keypos <= endpos);
      if (keyinfo->flag & HA_FULLTEXT)
      {
        uint off;
        int  subkeys;
        get_key_full_length_rdonly(off, key);
        subkeys=ft_sintXkorr(key+off);
        if (subkeys < 0)
        {
          next_page= _ma_dpos(info,0,key+key_length);
2908
          _ma_dpointer(info,keypos-nod_flag-share->rec_reflength,
2909
                       param->new_file_pos); /* Save new pos */
2910
          if (sort_one_index(param,info,&share->ft2_keyinfo,
2911 2912 2913 2914 2915 2916 2917 2918
                             next_page,new_file))
            goto err;
        }
      }
    }
  }

  /* Fill block with zero and write it to the new index file */
2919
  length= _ma_get_page_used(share, buff);
unknown's avatar
unknown committed
2920
  bzero((uchar*) buff+length,keyinfo->block_length-length);
unknown's avatar
unknown committed
2921
  put_crc(buff, new_page_pos, share);
unknown's avatar
unknown committed
2922
  if (my_pwrite(new_file,(uchar*) buff,(uint) keyinfo->block_length,
2923 2924 2925 2926 2927
		new_page_pos,MYF(MY_NABP | MY_WAIT_IF_FULL)))
  {
    _ma_check_print_error(param,"Can't write indexblock, error: %d",my_errno);
    goto err;
  }
unknown's avatar
unknown committed
2928
  my_afree((uchar*) buff);
2929 2930
  DBUG_RETURN(0);
err:
unknown's avatar
unknown committed
2931
  my_afree((uchar*) buff);
2932 2933 2934 2935
  DBUG_RETURN(1);
} /* sort_one_index */


unknown's avatar
unknown committed
2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950
/**
   @brief Fill empty space in index file with zeroes

   @return
   @retval 0  Ok
   @retval 1  Error
*/

static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info,
                                    const char *name)
{
  MARIA_SHARE *share= info->s;
  MARIA_PINNED_PAGE page_link;
  char llbuff[21];
  uchar *buff;
2951
  pgcache_page_no_t page;
unknown's avatar
unknown committed
2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984
  my_off_t pos;
  my_off_t key_file_length= share->state.state.key_file_length;
  uint block_size= share->block_size;
  my_bool transactional= share->base.born_transactional;
  DBUG_ENTER("maria_zerofill_index");

  if (!(param->testflag & T_SILENT))
    printf("- Zerofilling index for MARIA-table '%s'\n",name);

  /* Go through the index file */
  for (pos= share->base.keystart, page= (ulonglong) (pos / block_size);
       pos < key_file_length;
       pos+= block_size, page++)
  {
    uint length;
    if (!(buff= pagecache_read(share->pagecache,
                               &share->kfile, page,
                               DFLT_INIT_HITS, 0,
                               PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
                               &page_link.link)))
    {
      pagecache_unlock_by_link(share->pagecache, page_link.link,
                               PAGECACHE_LOCK_WRITE_UNLOCK,
                               PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
                               LSN_IMPOSSIBLE, 0);
      _ma_check_print_error(param,
                            "Page %9s: Got error %d when reading index file",
                            llstr(pos, llbuff), my_errno);
      DBUG_RETURN(1);
    }
    if (transactional)
      bzero(buff, LSN_SIZE);
    length= _ma_get_page_used(share, buff);
2985 2986 2987
    /* Skip mailformed blocks */
    DBUG_ASSERT(length + share->keypage_header <= block_size);
    if (length + share->keypage_header < block_size)
unknown's avatar
unknown committed
2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020
      bzero(buff + share->keypage_header + length, block_size - length -
            share->keypage_header);
    pagecache_unlock_by_link(share->pagecache, page_link.link,
                             PAGECACHE_LOCK_WRITE_UNLOCK,
                             PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
                             LSN_IMPOSSIBLE, 1);
  }
  if (flush_pagecache_blocks(share->pagecache, &share->kfile,
                             FLUSH_FORCE_WRITE))
    DBUG_RETURN(1);
  DBUG_RETURN(0);
}


/**
   @brief Fill empty space in index file with zeroes

   @todo
   Zerofill all pages marked in bitmap as empty and change them to
   be of type UNALLOCATED_PAGE

   @return
   @retval 0  Ok
   @retval 1  Error
*/

static my_bool maria_zerofill_data(HA_CHECK *param, MARIA_HA *info,
                                   const char *name)
{
  MARIA_SHARE *share= info->s;
  MARIA_PINNED_PAGE page_link;
  char llbuff[21];
  my_off_t pos;
3021
  pgcache_page_no_t page;
unknown's avatar
unknown committed
3022
  uint block_size= share->block_size;
unknown's avatar
unknown committed
3023
  MARIA_FILE_BITMAP *bitmap= &share->bitmap;
unknown's avatar
unknown committed
3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060
  DBUG_ENTER("maria_zerofill_data");

  /* This works only with BLOCK_RECORD files */
  if (share->data_file_type != BLOCK_RECORD)
    DBUG_RETURN(0);

  if (!(param->testflag & T_SILENT))
    printf("- Zerofilling data  for MARIA-table '%s'\n",name);

  /* Go through the record file */
  for (page= 1, pos= block_size;
       pos < info->state->data_file_length;
       pos+= block_size, page++)
  {
    uchar *buff;
    uint page_type;

    /* Ignore bitmap pages */
    if ((page % share->bitmap.pages_covered) == 0)
      continue;
    if (!(buff= pagecache_read(share->pagecache,
                               &info->dfile,
                               page, 1, 0,
                               PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
                               &page_link.link)))
    {
      _ma_check_print_error(param,
                            "Page %9s:  Got error: %d when reading datafile",
                            llstr(pos, llbuff), my_errno);
      goto err;
    }
    page_type= buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK;
    switch ((enum en_page_type) page_type) {
    case UNALLOCATED_PAGE:
      bzero(buff, block_size);
      break;
    case BLOB_PAGE:
unknown's avatar
unknown committed
3061 3062 3063 3064 3065 3066 3067
      if (_ma_bitmap_get_page_bits(info, bitmap, page) == 0)
      {
        /* Unallocated page */
        bzero(buff, block_size);
      }
      else
        bzero(buff, LSN_SIZE);
unknown's avatar
unknown committed
3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084
      break;
    case HEAD_PAGE:
    case TAIL_PAGE:
    {
      uint max_entry= (uint) buff[DIR_COUNT_OFFSET];
      uint offset, dir_start;
      uchar *dir;

      bzero(buff, LSN_SIZE);
      if (max_entry != 0)
      {
        dir= dir_entry_pos(buff, block_size, max_entry - 1);
        _ma_compact_block_page(buff, block_size, max_entry -1, 0);

        /* Zerofille the not used part */
        offset= uint2korr(dir) + uint2korr(dir+2);
        dir_start= (uint) (dir - buff);
3085
        DBUG_ASSERT(dir_start >= offset);
unknown's avatar
unknown committed
3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125
        if (dir_start > offset)
          bzero(buff + offset, dir_start - offset);
      }
      break;
    }
    default:
      _ma_check_print_error(param,
                            "Page %9s:  Found unrecognizable block of type %d",
                            llstr(pos, llbuff), page_type);
      goto err;
    }
    pagecache_unlock_by_link(share->pagecache, page_link.link,
                             PAGECACHE_LOCK_WRITE_UNLOCK,
                             PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
                             LSN_IMPOSSIBLE, 1);
  }
  if (flush_pagecache_blocks(share->pagecache, &info->dfile,
                             FLUSH_FORCE_WRITE))
    DBUG_RETURN(1);
  DBUG_RETURN(0);

err:
  pagecache_unlock_by_link(share->pagecache, page_link.link,
                           PAGECACHE_LOCK_WRITE_UNLOCK,
                           PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
                           LSN_IMPOSSIBLE, 0);
  DBUG_RETURN(1);
}


/**
   @brief Fill empty space in index and data files with zeroes

   @return
   @retval 0  Ok
   @retval 1  Error
*/

int maria_zerofill(HA_CHECK *param, MARIA_HA *info, const char *name)
{
3126
  my_bool error, reenable_logging;
unknown's avatar
unknown committed
3127
  DBUG_ENTER("maria_zerofill");
3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145
  if ((reenable_logging= info->s->now_transactional))
    _ma_tmp_disable_logging_for_table(info, 0);
  if (!(error= (maria_zerofill_index(param, info, name) ||
                maria_zerofill_data(param, info, name) ||
                _ma_set_uuid(info, 0))))
  {
    /*
      Mark that table is movable and that we have done zerofill of data and
      index
    */
    info->s->state.changed&= ~(STATE_NOT_ZEROFILLED | STATE_NOT_MOVABLE |
                               STATE_MOVED);
    /* Ensure state are flushed to disk */
    info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
  }
  if (reenable_logging)
    _ma_reenable_logging_for_table(info, FALSE);
  DBUG_RETURN(error);
unknown's avatar
unknown committed
3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156
}


/*
  Let temporary file replace old file.
  This assumes that the new file was created in the same
  directory as given by realpath(filename).
  This will ensure that any symlinks that are used will still work.
  Copy stats from old file to new file, deletes orignal and
  changes new file name to old file name
*/
3157 3158

int maria_change_to_newfile(const char * filename, const char * old_ext,
unknown's avatar
unknown committed
3159
                            const char * new_ext, myf MyFlags)
3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176
{
  char old_filename[FN_REFLEN],new_filename[FN_REFLEN];
#ifdef USE_RAID
  if (raid_chunks)
    return my_raid_redel(fn_format(old_filename,filename,"",old_ext,2+4),
			 fn_format(new_filename,filename,"",new_ext,2+4),
			 raid_chunks,
			 MYF(MY_WME | MY_LINK_WARNING | MyFlags));
#endif
  /* Get real path to filename */
  (void) fn_format(old_filename,filename,"",old_ext,2+4+32);
  return my_redel(old_filename,
		  fn_format(new_filename,old_filename,"",new_ext,2+4),
		  MYF(MY_WME | MY_LINK_WARNING | MyFlags));
} /* maria_change_to_newfile */


3177
/* Copy a block between two files */
3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194

int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start,
	     my_off_t length, const char *type)
{
  char tmp_buff[IO_SIZE],*buff;
  ulong buff_length;
  DBUG_ENTER("maria_filecopy");

  buff_length=(ulong) min(param->write_buffer_length,length);
  if (!(buff=my_malloc(buff_length,MYF(0))))
  {
    buff=tmp_buff; buff_length=IO_SIZE;
  }

  VOID(my_seek(from,start,MY_SEEK_SET,MYF(0)));
  while (length > buff_length)
  {
unknown's avatar
unknown committed
3195 3196
    if (my_read(from,(uchar*) buff,buff_length,MYF(MY_NABP)) ||
	my_write(to,(uchar*) buff,buff_length,param->myf_rw))
3197 3198 3199
      goto err;
    length-= buff_length;
  }
unknown's avatar
unknown committed
3200 3201
  if (my_read(from,(uchar*) buff,(uint) length,MYF(MY_NABP)) ||
      my_write(to,(uchar*) buff,(uint) length,param->myf_rw))
3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230
    goto err;
  if (buff != tmp_buff)
    my_free(buff,MYF(0));
  DBUG_RETURN(0);
err:
  if (buff != tmp_buff)
    my_free(buff,MYF(0));
  _ma_check_print_error(param,"Can't copy %s to tempfile, error %d",
		       type,my_errno);
  DBUG_RETURN(1);
}


/*
  Repair table or given index using sorting

  SYNOPSIS
    maria_repair_by_sort()
    param		Repair parameters
    info		MARIA handler to repair
    name		Name of table (for warnings)
    rep_quick		set to <> 0 if we should not change data file

  RESULT
    0	ok
    <>0	Error
*/

int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
3231
                         const char * name, my_bool rep_quick)
3232 3233 3234 3235
{
  int got_error;
  uint i;
  ha_rows start_records;
3236
  my_off_t new_header_length, org_header_length, del;
3237 3238
  File new_file;
  MARIA_SORT_PARAM sort_param;
3239
  MARIA_SHARE *share= info->s;
3240
  HA_KEYSEG *keyseg;
3241
  double  *rec_per_key_part;
3242 3243
  char llbuff[22];
  MARIA_SORT_INFO sort_info;
3244
  ulonglong key_map= share->state.key_map;
unknown's avatar
unknown committed
3245
  myf sync_dir= ((share->now_transactional && !share->temporary) ?
3246
                 MY_SYNC_DIR : 0);
3247
  my_bool scan_inited= 0;
3248 3249
  DBUG_ENTER("maria_repair_by_sort");

3250
  got_error= 1;
3251
  new_file= -1;
3252
  start_records= info->state->records;
3253 3254 3255 3256 3257 3258
  if (!(param->testflag & T_SILENT))
  {
    printf("- recovering (with sort) MARIA-table '%s'\n",name);
    printf("Data records: %s\n", llstr(start_records,llbuff));
  }

3259 3260
  if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
                                      rep_quick))
3261 3262
    goto err;

3263 3264 3265 3266
  org_header_length= share->pack.header_length;
  new_header_length= (param->testflag & T_UNPACK) ? 0 : org_header_length;
  sort_param.filepos= new_header_length;

3267 3268 3269
  if (!rep_quick)
  {
    /* Get real path for data file */
unknown's avatar
unknown committed
3270 3271 3272 3273 3274
    if ((new_file=my_create(fn_format(param->temp_filename,
                                      share->data_file_name, "",
                                      DATA_TMP_EXT, 2+4),
                            0,param->tmpfile_createflag,
                            MYF(0))) < 0)
3275 3276 3277 3278 3279
    {
      _ma_check_print_error(param,"Can't create new tempfile: '%s'",
			   param->temp_filename);
      goto err;
    }
3280
    if (new_header_length &&
unknown's avatar
unknown committed
3281 3282
        maria_filecopy(param, new_file, info->dfile.file, 0L,
                       new_header_length, "datafile-header"))
3283
      goto err;
3284

3285
    share->state.dellink= HA_OFFSET_ERROR;
3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302
    info->rec_cache.file= new_file;             /* For sort_delete_record */
    if (share->data_file_type == BLOCK_RECORD ||
        (param->testflag & T_UNPACK))
    {
      if (create_new_data_handle(&sort_param, new_file))
        goto err;
      sort_info.new_info->rec_cache.file= new_file;
    }
  }

  if (!(sort_info.key_block=
	alloc_key_blocks(param,
			 (uint) param->sort_key_blocks,
			 share->base.max_key_block_length)))
    goto err;
  sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;

3303
  if (share->data_file_type != BLOCK_RECORD)
3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327
  {
    /* We need a read buffer to read rows in big blocks */
    if (init_io_cache(&param->read_cache, info->dfile.file,
                      (uint) param->read_buffer_length,
                      READ_CACHE, org_header_length, 1, MYF(MY_WME)))
      goto err;
  }
  if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
  {
    /* When writing to not block records, we need a write buffer */
    if (!rep_quick)
    {
      if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
                        (uint) param->write_buffer_length,
                        WRITE_CACHE, new_header_length, 1,
                        MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
        goto err;
      sort_info.new_info->opt_flag|= WRITE_CACHE_USED;
    }
  }

  if (!(sort_param.record=(uchar*) my_malloc((uint) share->base.pack_reclength,
					   MYF(0))) ||
      _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
3328
                       share->base.default_rec_buff_size))
3329 3330 3331
  {
    _ma_check_print_error(param, "Not enough memory for extra record");
    goto err;
3332 3333 3334 3335 3336 3337 3338
  }

  if (!(param->testflag & T_CREATE_MISSING_KEYS))
  {
    /* Clear the pointers to the given rows */
    for (i=0 ; i < share->base.keys ; i++)
      share->state.key_root[i]= HA_OFFSET_ERROR;
unknown's avatar
unknown committed
3339
    share->state.key_del= HA_OFFSET_ERROR;
3340 3341 3342 3343 3344
    info->state->key_file_length=share->base.keystart;
  }
  else
    key_map= ~key_map;				/* Create the missing keys */

3345
  param->read_cache.end_of_file= sort_info.filelength;
3346
  sort_param.wordlist=NULL;
3347
  init_alloc_root(&sort_param.wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0);
3348 3349 3350 3351 3352 3353 3354 3355

  sort_param.key_cmp=sort_key_cmp;
  sort_param.lock_in_memory=maria_lock_memory;
  sort_param.tmpdir=param->tmpdir;
  sort_param.master =1;

  del=info->state->del;

3356
  rec_per_key_part= param->new_rec_per_key_part;
3357 3358 3359 3360 3361 3362 3363 3364 3365
  for (sort_param.key=0 ; sort_param.key < share->base.keys ;
       rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++)
  {
    sort_param.keyinfo=share->keyinfo+sort_param.key;
    if (! maria_is_key_active(key_map, sort_param.key))
    {
      /* Remember old statistics for key */
      memcpy((char*) rec_per_key_part,
	     (char*) (share->state.rec_per_key_part +
3366
		      (uint) (rec_per_key_part - param->new_rec_per_key_part)),
3367 3368 3369 3370 3371 3372
	     sort_param.keyinfo->keysegs*sizeof(*rec_per_key_part));
      continue;
    }

    if ((!(param->testflag & T_SILENT)))
      printf ("- Fixing index %d\n",sort_param.key+1);
3373 3374 3375

    sort_param.read_cache=param->read_cache;
    sort_param.seg=sort_param.keyinfo->seg;
3376
    sort_param.max_pos= sort_param.pos= org_header_length;
3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396
    keyseg=sort_param.seg;
    bzero((char*) sort_param.unique,sizeof(sort_param.unique));
    sort_param.key_length=share->rec_reflength;
    for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++)
    {
      sort_param.key_length+=keyseg[i].length;
      if (keyseg[i].flag & HA_SPACE_PACK)
	sort_param.key_length+=get_pack_length(keyseg[i].length);
      if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
	sort_param.key_length+=2 + test(keyseg[i].length >= 127);
      if (keyseg[i].flag & HA_NULL_PART)
	sort_param.key_length++;
    }
    info->state->records=info->state->del=share->state.split=0;
    info->state->empty=0;

    if (sort_param.keyinfo->flag & HA_FULLTEXT)
    {
      uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
                                    sort_param.keyinfo->seg->charset->mbmaxlen;
3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418
      sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
      /*
        fulltext indexes may have much more entries than the
        number of rows in the table. We estimate the number here.

        Note, built-in parser is always nr. 0 - see ftparser_call_initializer()
      */
      if (sort_param.keyinfo->ftparser_nr == 0)
      {
        /*
          for built-in parser the number of generated index entries
          cannot be larger than the size of the data file divided
          by the minimal word's length
        */
        sort_info.max_records=
          (ha_rows) (sort_info.filelength/ft_min_word_len+1);
      }
      else
      {
        /*
          for external plugin parser we cannot tell anything at all :(
          so, we'll use all the sort memory and start from ~10 buffpeks.
3419
          (see _ma_create_index_by_sort)
3420 3421 3422 3423
        */
        sort_info.max_records=
          10*param->sort_buffer_length/sort_param.key_length;
      }
3424

unknown's avatar
unknown committed
3425 3426
      sort_param.key_read=  sort_maria_ft_key_read;
      sort_param.key_write= sort_maria_ft_key_write;
3427 3428 3429
    }
    else
    {
unknown's avatar
unknown committed
3430 3431
      sort_param.key_read=  sort_key_read;
      sort_param.key_write= sort_key_write;
3432 3433
    }

3434 3435 3436 3437 3438 3439
    if (sort_info.new_info->s->data_file_type == BLOCK_RECORD)
    {
      scan_inited= 1;
      if (maria_scan_init(sort_info.info))
        goto err;
    }
3440
    if (_ma_create_index_by_sort(&sort_param,
3441 3442
                                 (my_bool) (!(param->testflag & T_VERBOSE)),
                                 (size_t) param->sort_buffer_length))
3443 3444
    {
      param->retry_repair=1;
unknown's avatar
unknown committed
3445
      _ma_check_print_error(param, "Create index by sort failed");
3446 3447
      goto err;
    }
unknown's avatar
unknown committed
3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458
    DBUG_EXECUTE_IF("maria_flush_whole_log",
                    {
                      DBUG_PRINT("maria_flush_whole_log", ("now"));
                      translog_flush(translog_get_horizon());
                    });
    DBUG_EXECUTE_IF("maria_crash_create_index_by_sort",
                    {
                      DBUG_PRINT("maria_crash_create_index_by_sort", ("now"));
                      fflush(DBUG_FILE);
                      abort();
                    });
3459 3460 3461 3462 3463 3464
    if (scan_inited)
    {
      scan_inited= 0;
      maria_scan_end(sort_info.info);
    }

3465 3466
    /* No need to calculate checksum again. */
    sort_param.calc_checksum= 0;
3467
    free_root(&sort_param.wordroot, MYF(0));
3468 3469

    /* Set for next loop */
3470
    sort_info.max_records= (ha_rows) sort_info.new_info->state->records;
3471 3472

    if (param->testflag & T_STATISTICS)
3473 3474
      maria_update_key_parts(sort_param.keyinfo, rec_per_key_part,
                             sort_param.unique,
3475 3476 3477
                             (param->stats_method ==
                              MI_STATS_METHOD_IGNORE_NULLS ?
                              sort_param.notnull : NULL),
3478
                             (ulonglong) info->state->records);
3479 3480
    maria_set_key_active(share->state.key_map, sort_param.key);

unknown's avatar
unknown committed
3481 3482 3483
    if (_ma_flush_table_files_before_swap(param, info))
      goto err;

3484 3485 3486
    if (sort_param.fix_datafile)
    {
      param->read_cache.end_of_file=sort_param.filepos;
3487 3488
      if (maria_write_data_suffix(&sort_info,1) ||
          end_io_cache(&sort_info.new_info->rec_cache))
unknown's avatar
unknown committed
3489 3490
      {
        _ma_check_print_error(param, "Got error when flushing row cache");
3491
	goto err;
unknown's avatar
unknown committed
3492
      }
3493 3494
      sort_info.new_info->opt_flag&= ~WRITE_CACHE_USED;

3495 3496 3497 3498 3499
      if (param->testflag & T_SAFE_REPAIR)
      {
	/* Don't repair if we loosed more than one row */
	if (info->state->records+1 < start_records)
	{
unknown's avatar
unknown committed
3500 3501 3502 3503
          _ma_check_print_error(param,
                                "Rows lost; Aborting because safe repair was "
                                "requested");
          info->state->records=start_records;
3504 3505 3506
	  goto err;
	}
      }
3507 3508 3509 3510 3511 3512 3513 3514 3515 3516

      sort_info.new_info->state->data_file_length= sort_param.filepos;
      if (sort_info.new_info != sort_info.info)
      {
        MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
        if (maria_close(sort_info.new_info))
        {
          _ma_check_print_error(param, "Got error %d on close", my_errno);
          goto err;
        }
3517
        copy_data_file_state(&share->state, &save_state);
3518 3519
        new_file= -1;
        sort_info.new_info= info;
unknown's avatar
unknown committed
3520
        info->rec_cache.file= info->dfile.file;
3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538
      }

      share->state.version=(ulong) time((time_t*) 0);	/* Force reopen */

      /* Replace the actual file with the temporary file */
      if (new_file >= 0)
      {
        my_close(new_file, MYF(MY_WME));
        new_file= -1;
      }
      change_data_file_descriptor(info, -1);
      if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT,
                                  DATA_TMP_EXT,
                                  (param->testflag & T_BACKUP_DATA ?
                                   MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
                                  sync_dir) ||
          _ma_open_datafile(info, share, -1))
      {
unknown's avatar
unknown committed
3539
        _ma_check_print_error(param, "Couldn't change to new data file");
3540 3541 3542 3543 3544 3545
        goto err;
      }
      if (param->testflag & T_UNPACK)
        restore_data_file_type(share);

      org_header_length= share->pack.header_length;
3546
      sort_info.org_data_file_type= share->data_file_type;
3547
      sort_info.filelength= info->state->data_file_length;
3548 3549 3550 3551 3552
      sort_param.fix_datafile=0;
    }
    else
      info->state->data_file_length=sort_param.max_pos;

unknown's avatar
unknown committed
3553
    param->read_cache.file= info->dfile.file;	/* re-init read cache */
3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572
    reinit_io_cache(&param->read_cache,READ_CACHE,share->pack.header_length,
                    1,1);
  }

  if (param->testflag & T_WRITE_LOOP)
  {
    VOID(fputs("          \r",stdout)); VOID(fflush(stdout));
  }

  if (rep_quick && del+sort_info.dupp != info->state->del)
  {
    _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records");
    _ma_check_print_error(param,"Run recovery again without -q");
    got_error=1;
    param->retry_repair=1;
    param->testflag|=T_RETRY_WITHOUT_QUICK;
    goto err;
  }

3573
  if (rep_quick && (param->testflag & T_FORCE_UNIQUENESS))
3574
  {
3575 3576 3577
    my_off_t skr= (info->state->data_file_length +
                   (sort_info.org_data_file_type == COMPRESSED_RECORD) ?
                   MEMMAP_EXTRA_MARGIN : 0);
3578
#ifdef USE_RELOC
3579
    if (sort_info.org_data_file_type == STATIC_RECORD &&
3580 3581 3582
	skr < share->base.reloc*share->base.min_pack_length)
      skr=share->base.reloc*share->base.min_pack_length;
#endif
unknown's avatar
unknown committed
3583
    if (skr != sort_info.filelength)
unknown's avatar
unknown committed
3584
      if (my_chsize(info->dfile.file, skr, 0, MYF(0)))
3585 3586 3587 3588
	_ma_check_print_warning(param,
			       "Can't change size of datafile,  error: %d",
			       my_errno);
  }
3589

3590 3591 3592
  if (param->testflag & T_CALC_CHECKSUM)
    info->state->checksum=param->glob_crc;

unknown's avatar
unknown committed
3593
  if (my_chsize(share->kfile.file, info->state->key_file_length, 0, MYF(0)))
3594 3595 3596 3597 3598 3599 3600 3601 3602
    _ma_check_print_warning(param,
			   "Can't change size of indexfile, error: %d",
			   my_errno);

  if (!(param->testflag & T_SILENT))
  {
    if (start_records != info->state->records)
      printf("Data records: %s\n", llstr(info->state->records,llbuff));
  }
unknown's avatar
unknown committed
3603 3604 3605 3606
  if (sort_info.dupp)
    _ma_check_print_warning(param,
                            "%s records have been removed",
                            llstr(sort_info.dupp,llbuff));
3607 3608 3609
  got_error=0;

  if (&share->state.state != info->state)
3610
    memcpy(&share->state.state, info->state, sizeof(*info->state));
3611 3612

err:
3613 3614 3615 3616
  if (scan_inited)
    maria_scan_end(sort_info.info);

  VOID(end_io_cache(&sort_info.new_info->rec_cache));
unknown's avatar
unknown committed
3617 3618
  VOID(end_io_cache(&param->read_cache));
  info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
3619
  sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
3620 3621 3622 3623
  if (got_error)
  {
    if (! param->error_printed)
      _ma_check_print_error(param,"%d when fixing table",my_errno);
unknown's avatar
unknown committed
3624
    (void)_ma_flush_table_files_before_swap(param, info);
3625 3626 3627 3628 3629
    if (sort_info.new_info && sort_info.new_info != sort_info.info)
    {
      unuse_data_file_descriptor(sort_info.new_info);
      maria_close(sort_info.new_info);
    }
3630 3631 3632
    if (new_file >= 0)
    {
      VOID(my_close(new_file,MYF(0)));
unknown's avatar
unknown committed
3633
      VOID(my_delete(param->temp_filename, MYF(MY_WME)));
3634 3635 3636
    }
    maria_mark_crashed_on_repair(info);
  }
3637 3638 3639 3640 3641 3642 3643 3644
  else
  {
    if (key_map == share->state.key_map)
      share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
    /*
      Now that we have flushed and forced everything, we can bump
      create_rename_lsn:
    */
unknown's avatar
unknown committed
3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655
    DBUG_EXECUTE_IF("maria_flush_whole_log",
                    {
                      DBUG_PRINT("maria_flush_whole_log", ("now"));
                      translog_flush(translog_get_horizon());
                    });
    DBUG_EXECUTE_IF("maria_crash_repair",
                    {
                      DBUG_PRINT("maria_crash_repair", ("now"));
                      fflush(DBUG_FILE);
                      abort();
                    });
3656
  }
unknown's avatar
unknown committed
3657
  share->state.changed|= STATE_NOT_SORTED_PAGES;
unknown's avatar
unknown committed
3658 3659 3660
  if (!rep_quick)
    share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED |
                             STATE_NOT_MOVABLE);
3661

unknown's avatar
unknown committed
3662
  my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR));
3663
  my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR));
unknown's avatar
unknown committed
3664 3665
  my_free((uchar*) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR));
  my_free((uchar*) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR));
3666 3667 3668 3669
  my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR));
  DBUG_RETURN(got_error);
}

unknown's avatar
unknown committed
3670

3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685
/*
  Threaded repair of table using sorting

  SYNOPSIS
    maria_repair_parallel()
    param		Repair parameters
    info		MARIA handler to repair
    name		Name of table (for warnings)
    rep_quick		set to <> 0 if we should not change data file

  DESCRIPTION
    Same as maria_repair_by_sort but do it multithreaded
    Each key is handled by a separate thread.
    TODO: make a number of threads a parameter

3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707
    In parallel repair we use one thread per index. There are two modes:

    Quick

      Only the indexes are rebuilt. All threads share a read buffer.
      Every thread that needs fresh data in the buffer enters the shared
      cache lock. The last thread joining the lock reads the buffer from
      the data file and wakes all other threads.

    Non-quick

      The data file is rebuilt and all indexes are rebuilt to point to
      the new record positions. One thread is the master thread. It
      reads from the old data file and writes to the new data file. It
      also creates one of the indexes. The other threads read from a
      buffer which is filled by the master. If they need fresh data,
      they enter the shared cache lock. If the masters write buffer is
      full, it flushes it to the new data file and enters the shared
      cache lock too. When all threads joined in the lock, the master
      copies its write buffer to the read buffer for the other threads
      and wakes them.

3708 3709 3710 3711 3712 3713
  RESULT
    0	ok
    <>0	Error
*/

int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
3714
			const char * name, my_bool rep_quick)
3715 3716 3717 3718 3719 3720 3721 3722 3723
{
#ifndef THREAD
  return maria_repair_by_sort(param, info, name, rep_quick);
#else
  int got_error;
  uint i,key, total_key_length, istep;
  ha_rows start_records;
  my_off_t new_header_length,del;
  File new_file;
3724
  MARIA_SORT_PARAM *sort_param=0, tmp_sort_param;
3725
  MARIA_SHARE *share= info->s;
3726
  double  *rec_per_key_part;
3727 3728
  HA_KEYSEG *keyseg;
  char llbuff[22];
3729
  IO_CACHE new_data_cache; /* For non-quick repair. */
3730 3731 3732 3733
  IO_CACHE_SHARE io_share;
  MARIA_SORT_INFO sort_info;
  ulonglong key_map=share->state.key_map;
  pthread_attr_t thr_attr;
3734 3735
  myf sync_dir= ((share->now_transactional && !share->temporary) ?
                 MY_SYNC_DIR : 0);
3736 3737
  DBUG_ENTER("maria_repair_parallel");

3738
  got_error= 1;
3739
  new_file= -1;
3740
  start_records= info->state->records;
3741 3742 3743
  if (!(param->testflag & T_SILENT))
  {
    printf("- parallel recovering (with sort) MARIA-table '%s'\n",name);
3744
    printf("Data records: %s\n", llstr(start_records, llbuff));
3745 3746
  }

3747 3748
  if (initialize_variables_for_repair(param, &sort_info, &tmp_sort_param, info,
                                      rep_quick))
3749 3750
    goto err;

3751 3752 3753
  new_header_length= ((param->testflag & T_UNPACK) ? 0 :
                      share->pack.header_length);

3754 3755 3756
  /*
    Quick repair (not touching data file, rebuilding indexes):
    {
unknown's avatar
unknown committed
3757
      Read  cache is (MI_CHECK *param)->read_cache using info->dfile.file.
3758 3759 3760 3761 3762 3763
    }

    Non-quick repair (rebuilding data file and indexes):
    {
      Master thread:

unknown's avatar
unknown committed
3764
        Read  cache is (MI_CHECK *param)->read_cache using info->dfile.file.
3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781
        Write cache is (MI_INFO   *info)->rec_cache  using new_file.

      Slave threads:

        Read  cache is new_data_cache synced to master rec_cache.

      The final assignment of the filedescriptor for rec_cache is done
      after the cache creation.

      Don't check file size on new_data_cache, as the resulting file size
      is not known yet.

      As rec_cache and new_data_cache are synced, write_buffer_length is
      used for the read cache 'new_data_cache'. Both start at the same
      position 'new_header_length'.
    }
  */
3782
  DBUG_PRINT("info", ("is quick repair: %d", (int) rep_quick));
3783

3784 3785 3786 3787
  /* Initialize pthread structures before goto err. */
  pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST);
  pthread_cond_init(&sort_info.cond, 0);

3788
  if (!(sort_info.key_block=
3789 3790
	alloc_key_blocks(param, (uint) param->sort_key_blocks,
			 share->base.max_key_block_length)) ||
unknown's avatar
unknown committed
3791
      init_io_cache(&param->read_cache, info->dfile.file,
3792 3793 3794
                    (uint) param->read_buffer_length,
                    READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)) ||
      (!rep_quick &&
unknown's avatar
unknown committed
3795
       (init_io_cache(&info->rec_cache, info->dfile.file,
3796 3797 3798 3799 3800 3801 3802
                      (uint) param->write_buffer_length,
                      WRITE_CACHE, new_header_length, 1,
                      MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw) ||
        init_io_cache(&new_data_cache, -1,
                      (uint) param->write_buffer_length,
                      READ_CACHE, new_header_length, 1,
                      MYF(MY_WME | MY_DONT_CHECK_FILESIZE)))))
3803 3804 3805
    goto err;
  sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
  info->opt_flag|=WRITE_CACHE_USED;
unknown's avatar
unknown committed
3806
  info->rec_cache.file= info->dfile.file;         /* for sort_delete_record */
3807 3808 3809 3810

  if (!rep_quick)
  {
    /* Get real path for data file */
unknown's avatar
unknown committed
3811 3812 3813 3814 3815 3816
    if ((new_file= my_create(fn_format(param->temp_filename,
                                       share->data_file_name, "",
                                       DATA_TMP_EXT,
                                       2+4),
                             0,param->tmpfile_createflag,
                             MYF(0))) < 0)
3817 3818 3819 3820 3821
    {
      _ma_check_print_error(param,"Can't create new tempfile: '%s'",
			   param->temp_filename);
      goto err;
    }
unknown's avatar
unknown committed
3822
    if (new_header_length &&
unknown's avatar
unknown committed
3823
        maria_filecopy(param, new_file, info->dfile.file,0L,new_header_length,
unknown's avatar
unknown committed
3824
                       "datafile-header"))
3825 3826
      goto err;
    if (param->testflag & T_UNPACK)
unknown's avatar
unknown committed
3827
      restore_data_file_type(share);
3828 3829 3830 3831 3832 3833 3834 3835 3836 3837
    share->state.dellink= HA_OFFSET_ERROR;
    info->rec_cache.file=new_file;
  }

  info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
  if (!(param->testflag & T_CREATE_MISSING_KEYS))
  {
    /* Clear the pointers to the given rows */
    for (i=0 ; i < share->base.keys ; i++)
      share->state.key_root[i]= HA_OFFSET_ERROR;
unknown's avatar
unknown committed
3838
    share->state.key_del= HA_OFFSET_ERROR;
3839 3840 3841 3842 3843
    info->state->key_file_length=share->base.keystart;
  }
  else
    key_map= ~key_map;				/* Create the missing keys */

3844
  param->read_cache.end_of_file= sort_info.filelength;
3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857

  /*
    +1 below is required hack for parallel repair mode.
    The info->state->records value, that is compared later
    to sort_info.max_records and cannot exceed it, is
    increased in sort_key_write. In maria_repair_by_sort, sort_key_write
    is called after sort_key_read, where the comparison is performed,
    but in parallel mode master thread can call sort_key_write
    before some other repair thread calls sort_key_read.
    Furthermore I'm not even sure +1 would be enough.
    May be sort_info.max_records shold be always set to max value in
    parallel mode.
  */
3858
  sort_info.max_records++;
3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870

  del=info->state->del;

  if (!(sort_param=(MARIA_SORT_PARAM *)
        my_malloc((uint) share->base.keys *
		  (sizeof(MARIA_SORT_PARAM) + share->base.pack_reclength),
		  MYF(MY_ZEROFILL))))
  {
    _ma_check_print_error(param,"Not enough memory for key!");
    goto err;
  }
  total_key_length=0;
3871
  rec_per_key_part= param->new_rec_per_key_part;
3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885
  info->state->records=info->state->del=share->state.split=0;
  info->state->empty=0;

  for (i=key=0, istep=1 ; key < share->base.keys ;
       rec_per_key_part+=sort_param[i].keyinfo->keysegs, i+=istep, key++)
  {
    sort_param[i].key=key;
    sort_param[i].keyinfo=share->keyinfo+key;
    sort_param[i].seg=sort_param[i].keyinfo->seg;
    if (! maria_is_key_active(key_map, key))
    {
      /* Remember old statistics for key */
      memcpy((char*) rec_per_key_part,
	     (char*) (share->state.rec_per_key_part+
3886
		      (uint) (rec_per_key_part - param->new_rec_per_key_part)),
3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909
	     sort_param[i].keyinfo->keysegs*sizeof(*rec_per_key_part));
      istep=0;
      continue;
    }
    istep=1;
    if ((!(param->testflag & T_SILENT)))
      printf ("- Fixing index %d\n",key+1);
    if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
    {
      sort_param[i].key_read=sort_maria_ft_key_read;
      sort_param[i].key_write=sort_maria_ft_key_write;
    }
    else
    {
      sort_param[i].key_read=sort_key_read;
      sort_param[i].key_write=sort_key_write;
    }
    sort_param[i].key_cmp=sort_key_cmp;
    sort_param[i].lock_in_memory=maria_lock_memory;
    sort_param[i].tmpdir=param->tmpdir;
    sort_param[i].sort_info=&sort_info;
    sort_param[i].master=0;
    sort_param[i].fix_datafile=0;
3910
    sort_param[i].calc_checksum= 0;
3911 3912 3913 3914

    sort_param[i].filepos=new_header_length;
    sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length;

3915
    sort_param[i].record= (((uchar *)(sort_param+share->base.keys))+
3916
			   (share->base.pack_reclength * i));
unknown's avatar
unknown committed
3917 3918
    if (_ma_alloc_buffer(&sort_param[i].rec_buff, &sort_param[i].rec_buff_size,
                         share->base.default_rec_buff_size))
3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941
    {
      _ma_check_print_error(param,"Not enough memory!");
      goto err;
    }
    sort_param[i].key_length=share->rec_reflength;
    for (keyseg=sort_param[i].seg; keyseg->type != HA_KEYTYPE_END;
	 keyseg++)
    {
      sort_param[i].key_length+=keyseg->length;
      if (keyseg->flag & HA_SPACE_PACK)
        sort_param[i].key_length+=get_pack_length(keyseg->length);
      if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
        sort_param[i].key_length+=2 + test(keyseg->length >= 127);
      if (keyseg->flag & HA_NULL_PART)
        sort_param[i].key_length++;
    }
    total_key_length+=sort_param[i].key_length;

    if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
    {
      uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
                                    sort_param[i].keyinfo->seg->charset->mbmaxlen;
      sort_param[i].key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
3942
      init_alloc_root(&sort_param[i].wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0);
3943 3944 3945 3946
    }
  }
  sort_info.total_keys=i;
  sort_param[0].master= 1;
3947
  sort_param[0].fix_datafile= ! rep_quick;
3948
  sort_param[0].calc_checksum= test(param->testflag & T_CALC_CHECKSUM);
3949 3950 3951 3952

  sort_info.got_error=0;
  pthread_mutex_lock(&sort_info.mutex);

3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968
  /*
    Initialize the I/O cache share for use with the read caches and, in
    case of non-quick repair, the write cache. When all threads join on
    the cache lock, the writer copies the write cache contents to the
    read caches.
  */
  if (i > 1)
  {
    if (rep_quick)
      init_io_cache_share(&param->read_cache, &io_share, NULL, i);
    else
      init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i);
  }
  else
    io_share.total_threads= 0; /* share not used */

3969 3970 3971 3972 3973
  (void) pthread_attr_init(&thr_attr);
  (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);

  for (i=0 ; i < sort_info.total_keys ; i++)
  {
3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986
    /*
      Copy the properly initialized IO_CACHE structure so that every
      thread has its own copy. In quick mode param->read_cache is shared
      for use by all threads. In non-quick mode all threads but the
      first copy the shared new_data_cache, which is synchronized to the
      write cache of the first thread. The first thread copies
      param->read_cache, which is not shared.
    */
    sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache :
                               new_data_cache);
    DBUG_PRINT("io_cache_share", ("thread: %u  read_cache: 0x%lx",
                                  i, (long) &sort_param[i].read_cache));

3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003
    /*
      two approaches: the same amount of memory for each thread
      or the memory for the same number of keys for each thread...
      In the second one all the threads will fill their sort_buffers
      (and call write_keys) at the same time, putting more stress on i/o.
    */
    sort_param[i].sortbuff_size=
#ifndef USING_SECOND_APPROACH
      param->sort_buffer_length/sort_info.total_keys;
#else
      param->sort_buffer_length*sort_param[i].key_length/total_key_length;
#endif
    if (pthread_create(&sort_param[i].thr, &thr_attr,
		       _ma_thr_find_all_keys,
		       (void *) (sort_param+i)))
    {
      _ma_check_print_error(param,"Cannot start a repair thread");
4004 4005 4006 4007
      /* Cleanup: Detach from the share. Avoid others to be blocked. */
      if (io_share.total_threads)
        remove_io_thread(&sort_param[i].read_cache);
      DBUG_PRINT("error", ("Cannot start a repair thread"));
4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026
      sort_info.got_error=1;
    }
    else
      sort_info.threads_running++;
  }
  (void) pthread_attr_destroy(&thr_attr);

  /* waiting for all threads to finish */
  while (sort_info.threads_running)
    pthread_cond_wait(&sort_info.cond, &sort_info.mutex);
  pthread_mutex_unlock(&sort_info.mutex);

  if ((got_error= _ma_thr_write_keys(sort_param)))
  {
    param->retry_repair=1;
    goto err;
  }
  got_error=1;				/* Assume the following may go wrong */

unknown's avatar
unknown committed
4027 4028 4029
  if (_ma_flush_table_files_before_swap(param, info))
    goto err;

4030 4031
  if (sort_param[0].fix_datafile)
  {
4032
    /*
unknown's avatar
unknown committed
4033
      Append some nulls to the end of a memory mapped file. Destroy the
4034 4035 4036
      write cache. The master thread did already detach from the share
      by remove_io_thread() in sort.c:thr_find_all_keys().
    */
unknown's avatar
unknown committed
4037 4038
    if (maria_write_data_suffix(&sort_info,1) ||
        end_io_cache(&info->rec_cache))
4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052
      goto err;
    if (param->testflag & T_SAFE_REPAIR)
    {
      /* Don't repair if we loosed more than one row */
      if (info->state->records+1 < start_records)
      {
        info->state->records=start_records;
        goto err;
      }
    }
    share->state.state.data_file_length= info->state->data_file_length=
      sort_param->filepos;
    /* Only whole records */
    share->state.version=(ulong) time((time_t*) 0);
4053 4054 4055 4056
    /*
      Exchange the data file descriptor of the table, so that we use the
      new file from now on.
     */
unknown's avatar
unknown committed
4057 4058
    my_close(info->dfile.file, MYF(0));
    info->dfile.file= new_file;
4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072
    share->pack.header_length=(ulong) new_header_length;
  }
  else
    info->state->data_file_length=sort_param->max_pos;

  if (rep_quick && del+sort_info.dupp != info->state->del)
  {
    _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records");
    _ma_check_print_error(param,"Run recovery again without -q");
    param->retry_repair=1;
    param->testflag|=T_RETRY_WITHOUT_QUICK;
    goto err;
  }

4073
  if (rep_quick && (param->testflag & T_FORCE_UNIQUENESS))
4074
  {
4075 4076 4077
    my_off_t skr= (info->state->data_file_length +
                   (sort_info.org_data_file_type == COMPRESSED_RECORD) ?
                   MEMMAP_EXTRA_MARGIN : 0);
4078
#ifdef USE_RELOC
4079
    if (sort_info.org_data_file_type == STATIC_RECORD &&
4080 4081 4082
	skr < share->base.reloc*share->base.min_pack_length)
      skr=share->base.reloc*share->base.min_pack_length;
#endif
unknown's avatar
unknown committed
4083
    if (skr != sort_info.filelength)
unknown's avatar
unknown committed
4084
      if (my_chsize(info->dfile.file, skr, 0, MYF(0)))
4085 4086 4087 4088 4089 4090 4091
	_ma_check_print_warning(param,
			       "Can't change size of datafile,  error: %d",
			       my_errno);
  }
  if (param->testflag & T_CALC_CHECKSUM)
    info->state->checksum=param->glob_crc;

unknown's avatar
unknown committed
4092
  if (my_chsize(share->kfile.file, info->state->key_file_length, 0, MYF(0)))
4093
    _ma_check_print_warning(param,
unknown's avatar
unknown committed
4094 4095
			   "Can't change size of indexfile, error: %d",
                            my_errno);
4096 4097 4098 4099 4100 4101

  if (!(param->testflag & T_SILENT))
  {
    if (start_records != info->state->records)
      printf("Data records: %s\n", llstr(info->state->records,llbuff));
  }
unknown's avatar
unknown committed
4102 4103 4104 4105
  if (sort_info.dupp)
    _ma_check_print_warning(param,
                            "%s records have been removed",
                            llstr(sort_info.dupp,llbuff));
4106 4107 4108 4109 4110 4111
  got_error=0;

  if (&share->state.state != info->state)
    memcpy(&share->state.state, info->state, sizeof(*info->state));

err:
4112 4113 4114 4115 4116
  /*
    Destroy the write cache. The master thread did already detach from
    the share by remove_io_thread() or it was not yet started (if the
    error happend before creating the thread).
  */
4117
  VOID(end_io_cache(&sort_info.new_info->rec_cache));
unknown's avatar
unknown committed
4118 4119
  VOID(end_io_cache(&param->read_cache));
  info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4120
  sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4121 4122 4123 4124 4125 4126 4127 4128
  /*
    Destroy the new data cache in case of non-quick repair. All slave
    threads did either detach from the share by remove_io_thread()
    already or they were not yet started (if the error happend before
    creating the threads).
  */
  if (!rep_quick)
    VOID(end_io_cache(&new_data_cache));
4129 4130 4131 4132 4133 4134
  if (!got_error)
  {
    /* Replace the actual file with the temporary file */
    if (new_file >= 0)
    {
      my_close(new_file,MYF(0));
unknown's avatar
unknown committed
4135
      info->dfile.file= new_file= -1;
4136
      if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT,
unknown's avatar
unknown committed
4137
                                  DATA_TMP_EXT,
4138 4139 4140
                                  MYF((param->testflag & T_BACKUP_DATA ?
                                       MY_REDEL_MAKE_BACKUP : 0) |
                                      sync_dir)) ||
4141 4142 4143 4144 4145 4146 4147 4148
	  _ma_open_datafile(info,share,-1))
	got_error=1;
    }
  }
  if (got_error)
  {
    if (! param->error_printed)
      _ma_check_print_error(param,"%d when fixing table",my_errno);
unknown's avatar
unknown committed
4149
    (void)_ma_flush_table_files_before_swap(param, info);
4150 4151 4152
    if (new_file >= 0)
    {
      VOID(my_close(new_file,MYF(0)));
unknown's avatar
unknown committed
4153
      VOID(my_delete(param->temp_filename, MYF(MY_WME)));
unknown's avatar
unknown committed
4154 4155
      if (info->dfile.file == new_file)
	info->dfile.file= -1;
4156 4157 4158 4159 4160
    }
    maria_mark_crashed_on_repair(info);
  }
  else if (key_map == share->state.key_map)
    share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
unknown's avatar
unknown committed
4161
  share->state.changed|= STATE_NOT_SORTED_PAGES;
unknown's avatar
unknown committed
4162 4163 4164
  if (!rep_quick)
    share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED |
                             STATE_NOT_MOVABLE);
4165 4166 4167 4168

  pthread_cond_destroy (&sort_info.cond);
  pthread_mutex_destroy(&sort_info.mutex);

unknown's avatar
unknown committed
4169 4170 4171
  my_free((uchar*) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR));
  my_free((uchar*) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR));
  my_free((uchar*) sort_param,MYF(MY_ALLOW_ZERO_PTR));
4172 4173
  my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR));
  if (!got_error && (param->testflag & T_UNPACK))
unknown's avatar
unknown committed
4174
    restore_data_file_type(share);
4175 4176 4177 4178 4179 4180
  DBUG_RETURN(got_error);
#endif /* THREAD */
}

	/* Read next record and return next key */

unknown's avatar
unknown committed
4181
static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
4182 4183
{
  int error;
unknown's avatar
unknown committed
4184 4185
  MARIA_SORT_INFO *sort_info= sort_param->sort_info;
  MARIA_HA *info= sort_info->info;
4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196
  DBUG_ENTER("sort_key_read");

  if ((error=sort_get_next_record(sort_param)))
    DBUG_RETURN(error);
  if (info->state->records == sort_info->max_records)
  {
    _ma_check_print_error(sort_info->param,
			 "Key %d - Found too many records; Can't continue",
                         sort_param->key+1);
    DBUG_RETURN(1);
  }
4197 4198 4199
  if (_ma_sort_write_record(sort_param))
    DBUG_RETURN(1);

4200 4201
  sort_param->real_key_length=
    (info->s->rec_reflength+
unknown's avatar
unknown committed
4202
     _ma_make_key(info, sort_param->key, key,
4203
		  sort_param->record, sort_param->current_filepos));
4204 4205 4206 4207
#ifdef HAVE_purify
  bzero(key+sort_param->real_key_length,
	(sort_param->key_length-sort_param->real_key_length));
#endif
4208
  DBUG_RETURN(0);
4209 4210
} /* sort_key_read */

unknown's avatar
unknown committed
4211

unknown's avatar
unknown committed
4212
static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223
{
  int error;
  MARIA_SORT_INFO *sort_info=sort_param->sort_info;
  MARIA_HA *info=sort_info->info;
  FT_WORD *wptr=0;
  DBUG_ENTER("sort_maria_ft_key_read");

  if (!sort_param->wordlist)
  {
    for (;;)
    {
4224
      free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
4225 4226
      if ((error=sort_get_next_record(sort_param)))
        DBUG_RETURN(error);
4227 4228
      if ((error= _ma_sort_write_record(sort_param)))
        DBUG_RETURN(error);
4229 4230 4231
      if (!(wptr= _ma_ft_parserecord(info,sort_param->key,sort_param->record,
                                     &sort_param->wordroot)))

4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245
        DBUG_RETURN(1);
      if (wptr->pos)
        break;
    }
    sort_param->wordptr=sort_param->wordlist=wptr;
  }
  else
  {
    error=0;
    wptr=(FT_WORD*)(sort_param->wordptr);
  }

  sort_param->real_key_length=(info->s->rec_reflength+
			       _ma_ft_make_key(info, sort_param->key,
unknown's avatar
unknown committed
4246
                                               key, wptr++,
4247
                                               sort_param->current_filepos));
4248 4249 4250 4251 4252 4253 4254
#ifdef HAVE_purify
  if (sort_param->key_length > sort_param->real_key_length)
    bzero(key+sort_param->real_key_length,
	  (sort_param->key_length-sort_param->real_key_length));
#endif
  if (!wptr->pos)
  {
4255
    free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
4256 4257 4258 4259 4260 4261 4262 4263 4264
    sort_param->wordlist=0;
  }
  else
    sort_param->wordptr=(void*)wptr;

  DBUG_RETURN(error);
} /* sort_maria_ft_key_read */


4265 4266 4267 4268 4269 4270 4271
/*
  Read next record from file using parameters in sort_info.

  SYNOPSIS
    sort_get_next_record()
      sort_param                Information about and for the sort process

4272
  NOTES
4273 4274
    Dynamic Records With Non-Quick Parallel Repair

4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287
    For non-quick parallel repair we use a synchronized read/write
    cache. This means that one thread is the master who fixes the data
    file by reading each record from the old data file and writing it
    to the new data file. By doing this the records in the new data
    file are written contiguously. Whenever the write buffer is full,
    it is copied to the read buffer. The slaves read from the read
    buffer, which is not associated with a file. Thus read_cache.file
    is -1. When using _mi_read_cache(), the slaves must always set
    flag to READING_NEXT so that the function never tries to read from
    file. This is safe because the records are contiguous. There is no
    need to read outside the cache. This condition is evaluated in the
    variable 'parallel_flag' for quick reference. read_cache.file must
    be >= 0 in every other case.
4288 4289 4290 4291

  RETURN
    -1          end of file
    0           ok
4292
                sort_param->current_filepos points to record position.
4293
                sort_param->record contains record
4294
                sort_param->max_pos contains position to last byte read
4295 4296
    > 0         error
*/
4297 4298 4299 4300

static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
{
  int searching;
4301
  int parallel_flag;
4302 4303 4304 4305 4306 4307
  uint found_record,b_type,left_length;
  my_off_t pos;
  MARIA_BLOCK_INFO block_info;
  MARIA_SORT_INFO *sort_info=sort_param->sort_info;
  HA_CHECK *param=sort_info->param;
  MARIA_HA *info=sort_info->info;
4308
  MARIA_SHARE *share= info->s;
4309 4310 4311 4312 4313 4314
  char llbuff[22],llbuff2[22];
  DBUG_ENTER("sort_get_next_record");

  if (*_ma_killed_ptr(param))
    DBUG_RETURN(1);

4315
  switch (sort_info->org_data_file_type) {
unknown's avatar
unknown committed
4316
  case BLOCK_RECORD:
4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329
  {
    for (;;)
    {
      int flag;

      if (info != sort_info->new_info)
      {
        /* Safe scanning */
        flag= _ma_safe_scan_block_record(sort_info, info,
                                         sort_param->record);
      }
      else
      {
unknown's avatar
unknown committed
4330 4331 4332 4333
        /*
          Scan on clean table.
          It requires a reliable data_file_length so we set it.
        */
unknown's avatar
unknown committed
4334
        info->state->data_file_length= sort_info->filelength;
4335 4336 4337 4338 4339 4340 4341 4342
        flag= _ma_scan_block_record(info, sort_param->record,
                                    info->cur_row.nextpos, 1);
      }
      if (!flag)
      {
	if (sort_param->calc_checksum)
        {
          ha_checksum checksum;
4343 4344
          checksum= (*share->calc_check_checksum)(info, sort_param->record);
          if (share->calc_checksum &&
4345 4346 4347 4348
              info->cur_row.checksum != (checksum & 255))
          {
            if (param->testflag & T_VERBOSE)
            {
4349
              record_pos_to_txt(info, info->cur_row.lastpos, llbuff);
4350 4351 4352 4353 4354 4355 4356 4357 4358
              _ma_check_print_info(param,
                                   "Found record with wrong checksum at %s",
                                   llbuff);
            }
            continue;
          }
          info->cur_row.checksum= checksum;
	  param->glob_crc+= checksum;
        }
4359 4360
        sort_param->start_recpos= sort_param->current_filepos=
          info->cur_row.lastpos;
4361 4362 4363 4364
        DBUG_RETURN(0);
      }
      if (flag == HA_ERR_END_OF_FILE)
      {
4365
        sort_param->max_pos= info->state->data_file_length;
4366 4367 4368 4369 4370 4371
        DBUG_RETURN(-1);
      }
      /* Retry only if wrong record, not if disk error */
      if (flag != HA_ERR_WRONG_IN_RECORD)
        DBUG_RETURN(flag);
    }
unknown's avatar
unknown committed
4372
    break;
4373
  }
4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388
  case STATIC_RECORD:
    for (;;)
    {
      if (my_b_read(&sort_param->read_cache,sort_param->record,
		    share->base.pack_reclength))
      {
	if (sort_param->read_cache.error)
	  param->out_flag |= O_DATA_LOST;
        param->retry_repair=1;
        param->testflag|=T_RETRY_WITHOUT_QUICK;
	DBUG_RETURN(-1);
      }
      sort_param->start_recpos=sort_param->pos;
      if (!sort_param->fix_datafile)
      {
4389
	sort_param->current_filepos= sort_param->pos;
4390 4391 4392 4393 4394 4395
        if (sort_param->master)
	  share->state.split++;
      }
      sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength);
      if (*sort_param->record)
      {
4396
	if (sort_param->calc_checksum)
unknown's avatar
unknown committed
4397
	  param->glob_crc+= (info->cur_row.checksum=
4398 4399 4400 4401 4402 4403 4404 4405 4406 4407
			     _ma_static_checksum(info,sort_param->record));
	DBUG_RETURN(0);
      }
      if (!sort_param->fix_datafile && sort_param->master)
      {
	info->state->del++;
	info->state->empty+=share->base.pack_reclength;
      }
    }
  case DYNAMIC_RECORD:
unknown's avatar
unknown committed
4408
  {
unknown's avatar
unknown committed
4409
    uchar *to;
4410
    ha_checksum checksum= 0;
unknown's avatar
unknown committed
4411
    LINT_INIT(to);
4412

4413 4414
    pos=sort_param->pos;
    searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND));
4415
    parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0;
4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442
    for (;;)
    {
      found_record=block_info.second_read= 0;
      left_length=1;
      if (searching)
      {
	pos=MY_ALIGN(pos,MARIA_DYN_ALIGN_SIZE);
        param->testflag|=T_RETRY_WITHOUT_QUICK;
	sort_param->start_recpos=pos;
      }
      do
      {
	if (pos > sort_param->max_pos)
	  sort_param->max_pos=pos;
	if (pos & (MARIA_DYN_ALIGN_SIZE-1))
	{
	  if ((param->testflag & T_VERBOSE) || searching == 0)
	    _ma_check_print_info(param,"Wrong aligned block at %s",
				llstr(pos,llbuff));
	  if (searching)
	    goto try_next;
	}
	if (found_record && pos == param->search_after_block)
	  _ma_check_print_info(param,"Block: %s used by record at %s",
		     llstr(param->search_after_block,llbuff),
		     llstr(sort_param->start_recpos,llbuff2));
	if (_ma_read_cache(&sort_param->read_cache,
unknown's avatar
unknown committed
4443
                           (uchar*) block_info.header,pos,
4444 4445
			   MARIA_BLOCK_INFO_HEADER_LENGTH,
			   (! found_record ? READING_NEXT : 0) |
4446
			   parallel_flag | READING_HEADER))
4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498
	{
	  if (found_record)
	  {
	    _ma_check_print_info(param,
				"Can't read whole record at %s (errno: %d)",
				llstr(sort_param->start_recpos,llbuff),errno);
	    goto try_next;
	  }
	  DBUG_RETURN(-1);
	}
	if (searching && ! sort_param->fix_datafile)
	{
	  param->error_printed=1;
          param->retry_repair=1;
          param->testflag|=T_RETRY_WITHOUT_QUICK;
	  DBUG_RETURN(1);	/* Something wrong with data */
	}
	b_type= _ma_get_block_info(&block_info,-1,pos);
	if ((b_type & (BLOCK_ERROR | BLOCK_FATAL_ERROR)) ||
	   ((b_type & BLOCK_FIRST) &&
	     (block_info.rec_len < (uint) share->base.min_pack_length ||
	      block_info.rec_len > (uint) share->base.max_pack_length)))
	{
	  uint i;
	  if (param->testflag & T_VERBOSE || searching == 0)
	    _ma_check_print_info(param,
				"Wrong bytesec: %3d-%3d-%3d at %10s; Skipped",
		       block_info.header[0],block_info.header[1],
		       block_info.header[2],llstr(pos,llbuff));
	  if (found_record)
	    goto try_next;
	  block_info.second_read=0;
	  searching=1;
	  /* Search after block in read header string */
	  for (i=MARIA_DYN_ALIGN_SIZE ;
	       i < MARIA_BLOCK_INFO_HEADER_LENGTH ;
	       i+= MARIA_DYN_ALIGN_SIZE)
	    if (block_info.header[i] >= 1 &&
		block_info.header[i] <= MARIA_MAX_DYN_HEADER_BYTE)
	      break;
	  pos+=(ulong) i;
	  sort_param->start_recpos=pos;
	  continue;
	}
	if (b_type & BLOCK_DELETED)
	{
	  bool error=0;
	  if (block_info.block_len+ (uint) (block_info.filepos-pos) <
	      share->base.min_block_length)
	  {
	    if (!searching)
	      _ma_check_print_info(param,
4499 4500 4501
                                   "Deleted block with impossible length %lu "
                                   "at %s",
                                   block_info.block_len,llstr(pos,llbuff));
4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538
	    error=1;
	  }
	  else
	  {
	    if ((block_info.next_filepos != HA_OFFSET_ERROR &&
		 block_info.next_filepos >=
		 info->state->data_file_length) ||
		(block_info.prev_filepos != HA_OFFSET_ERROR &&
		 block_info.prev_filepos >= info->state->data_file_length))
	    {
	      if (!searching)
		_ma_check_print_info(param,
				    "Delete link points outside datafile at %s",
				    llstr(pos,llbuff));
	      error=1;
	    }
	  }
	  if (error)
	  {
	    if (found_record)
	      goto try_next;
	    searching=1;
	    pos+= MARIA_DYN_ALIGN_SIZE;
	    sort_param->start_recpos=pos;
	    block_info.second_read=0;
	    continue;
	  }
	}
	else
	{
	  if (block_info.block_len+ (uint) (block_info.filepos-pos) <
	      share->base.min_block_length ||
	      block_info.block_len > (uint) share->base.max_pack_length+
	      MARIA_SPLIT_LENGTH)
	  {
	    if (!searching)
	      _ma_check_print_info(param,
4539 4540 4541
                                   "Found block with impossible length %lu "
                                   "at %s; Skipped",
                                   block_info.block_len+
4542
                                   (uint) (block_info.filepos-pos),
4543
                                   llstr(pos,llbuff));
4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581
	    if (found_record)
	      goto try_next;
	    searching=1;
	    pos+= MARIA_DYN_ALIGN_SIZE;
	    sort_param->start_recpos=pos;
	    block_info.second_read=0;
	    continue;
	  }
	}
	if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
	{
          if (!sort_param->fix_datafile && sort_param->master &&
              (b_type & BLOCK_DELETED))
	  {
	    info->state->empty+=block_info.block_len;
	    info->state->del++;
	    share->state.split++;
	  }
	  if (found_record)
	    goto try_next;
	  if (searching)
	  {
	    pos+=MARIA_DYN_ALIGN_SIZE;
	    sort_param->start_recpos=pos;
	  }
	  else
	    pos=block_info.filepos+block_info.block_len;
	  block_info.second_read=0;
	  continue;
	}

	if (!sort_param->fix_datafile && sort_param->master)
	  share->state.split++;
	if (! found_record++)
	{
	  sort_param->find_length=left_length=block_info.rec_len;
	  sort_param->start_recpos=pos;
	  if (!sort_param->fix_datafile)
4582
	    sort_param->current_filepos= sort_param->start_recpos;
4583 4584 4585 4586 4587 4588
	  if (sort_param->fix_datafile && (param->testflag & T_EXTEND))
	    sort_param->pos=block_info.filepos+1;
	  else
	    sort_param->pos=block_info.filepos+block_info.block_len;
	  if (share->base.blobs)
	  {
unknown's avatar
unknown committed
4589 4590 4591
	    if (_ma_alloc_buffer(&sort_param->rec_buff,
                                 &sort_param->rec_buff_size,
                                 block_info.rec_len +
4592
                                 share->base.extra_rec_buff_size))
4593

4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610
	    {
	      if (param->max_record_length >= block_info.rec_len)
	      {
		_ma_check_print_error(param,"Not enough memory for blob at %s (need %lu)",
				     llstr(sort_param->start_recpos,llbuff),
				     (ulong) block_info.rec_len);
		DBUG_RETURN(1);
	      }
	      else
	      {
		_ma_check_print_info(param,"Not enough memory for blob at %s (need %lu); Row skipped",
				    llstr(sort_param->start_recpos,llbuff),
				    (ulong) block_info.rec_len);
		goto try_next;
	      }
	    }
	  }
unknown's avatar
unknown committed
4611
          to= sort_param->rec_buff;
4612 4613 4614 4615
	}
	if (left_length < block_info.data_len || ! block_info.data_len)
	{
	  _ma_check_print_info(param,
unknown's avatar
unknown committed
4616 4617 4618
			      "Found block with too small length at %s; "
                               "Skipped",
                               llstr(sort_param->start_recpos,llbuff));
4619 4620 4621 4622 4623 4624
	  goto try_next;
	}
	if (block_info.filepos + block_info.data_len >
	    sort_param->read_cache.end_of_file)
	{
	  _ma_check_print_info(param,
unknown's avatar
unknown committed
4625 4626 4627
			      "Found block that points outside data file "
                               "at %s",
                               llstr(sort_param->start_recpos,llbuff));
4628 4629
	  goto try_next;
	}
4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654
        /*
          Copy information that is already read. Avoid accessing data
          below the cache start. This could happen if the header
          streched over the end of the previous buffer contents.
        */
        {
          uint header_len= (uint) (block_info.filepos - pos);
          uint prefetch_len= (MARIA_BLOCK_INFO_HEADER_LENGTH - header_len);

          if (prefetch_len > block_info.data_len)
            prefetch_len= block_info.data_len;
          if (prefetch_len)
          {
            memcpy(to, block_info.header + header_len, prefetch_len);
            block_info.filepos+= prefetch_len;
            block_info.data_len-= prefetch_len;
            left_length-= prefetch_len;
            to+= prefetch_len;
          }
        }
        if (block_info.data_len &&
            _ma_read_cache(&sort_param->read_cache,to,block_info.filepos,
                           block_info.data_len,
                           (found_record == 1 ? READING_NEXT : 0) |
                           parallel_flag))
4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683
	{
	  _ma_check_print_info(param,
			      "Read error for block at: %s (error: %d); Skipped",
			      llstr(block_info.filepos,llbuff),my_errno);
	  goto try_next;
	}
	left_length-=block_info.data_len;
	to+=block_info.data_len;
	pos=block_info.next_filepos;
	if (pos == HA_OFFSET_ERROR && left_length)
	{
	  _ma_check_print_info(param,"Wrong block with wrong total length starting at %s",
			      llstr(sort_param->start_recpos,llbuff));
	  goto try_next;
	}
	if (pos + MARIA_BLOCK_INFO_HEADER_LENGTH > sort_param->read_cache.end_of_file)
	{
	  _ma_check_print_info(param,"Found link that points at %s (outside data file) at %s",
			      llstr(pos,llbuff2),
			      llstr(sort_param->start_recpos,llbuff));
	  goto try_next;
	}
      } while (left_length);

      if (_ma_rec_unpack(info,sort_param->record,sort_param->rec_buff,
			 sort_param->find_length) != MY_FILE_ERROR)
      {
	if (sort_param->read_cache.error < 0)
	  DBUG_RETURN(1);
4684
	if (sort_param->calc_checksum)
4685
	  checksum= (share->calc_check_checksum)(info, sort_param->record);
4686 4687 4688 4689 4690
	if ((param->testflag & (T_EXTEND | T_REP)) || searching)
	{
	  if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff,
                            sort_param->find_length,
                            (param->testflag & T_QUICK) &&
4691
                            sort_param->calc_checksum &&
4692
                            test(share->calc_checksum), checksum))
4693 4694 4695 4696 4697 4698
	  {
	    _ma_check_print_info(param,"Found wrong packed record at %s",
				llstr(sort_param->start_recpos,llbuff));
	    goto try_next;
	  }
	}
4699
	if (sort_param->calc_checksum)
4700
	  param->glob_crc+= checksum;
4701 4702 4703 4704 4705 4706 4707 4708 4709 4710
	DBUG_RETURN(0);
      }
      if (!searching)
        _ma_check_print_info(param,"Key %d - Found wrong stored record at %s",
                            sort_param->key+1,
                            llstr(sort_param->start_recpos,llbuff));
    try_next:
      pos=(sort_param->start_recpos+=MARIA_DYN_ALIGN_SIZE);
      searching=1;
    }
unknown's avatar
unknown committed
4711
  }
4712 4713 4714
  case COMPRESSED_RECORD:
    for (searching=0 ;; searching=1, sort_param->pos++)
    {
unknown's avatar
unknown committed
4715
      if (_ma_read_cache(&sort_param->read_cache,(uchar*) block_info.header,
4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726
			 sort_param->pos,
			 share->pack.ref_length,READING_NEXT))
	DBUG_RETURN(-1);
      if (searching && ! sort_param->fix_datafile)
      {
	param->error_printed=1;
        param->retry_repair=1;
        param->testflag|=T_RETRY_WITHOUT_QUICK;
	DBUG_RETURN(1);		/* Something wrong with data */
      }
      sort_param->start_recpos=sort_param->pos;
4727
      if (_ma_pack_get_block_info(info, &sort_param->bit_buff, &block_info,
unknown's avatar
unknown committed
4728 4729 4730
                                  &sort_param->rec_buff,
                                  &sort_param->rec_buff_size, -1,
                                  sort_param->pos))
4731 4732 4733 4734 4735 4736 4737 4738 4739
	DBUG_RETURN(-1);
      if (!block_info.rec_len &&
	  sort_param->pos + MEMMAP_EXTRA_MARGIN ==
	  sort_param->read_cache.end_of_file)
	DBUG_RETURN(-1);
      if (block_info.rec_len < (uint) share->min_pack_length ||
	  block_info.rec_len > (uint) share->max_pack_length)
      {
	if (! searching)
4740 4741 4742 4743 4744
	  _ma_check_print_info(param,
                               "Found block with wrong recordlength: %lu "
                               "at %s\n",
                               block_info.rec_len,
                               llstr(sort_param->pos,llbuff));
4745 4746
	continue;
      }
unknown's avatar
unknown committed
4747
      if (_ma_read_cache(&sort_param->read_cache,(uchar*) sort_param->rec_buff,
4748 4749 4750 4751 4752 4753 4754 4755
			 block_info.filepos, block_info.rec_len,
			 READING_NEXT))
      {
	if (! searching)
	  _ma_check_print_info(param,"Couldn't read whole record from %s",
			      llstr(sort_param->pos,llbuff));
	continue;
      }
4756 4757
#ifdef HAVE_purify
      bzero(sort_param->rec_buff + block_info.rec_len,
4758
            share->base.extra_rec_buff_size);
4759
#endif
4760 4761
      if (_ma_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record,
                              sort_param->rec_buff, block_info.rec_len))
4762 4763 4764 4765 4766 4767 4768 4769
      {
	if (! searching)
	  _ma_check_print_info(param,"Found wrong record at %s",
			      llstr(sort_param->pos,llbuff));
	continue;
      }
      if (!sort_param->fix_datafile)
      {
4770
	sort_param->current_filepos= sort_param->pos;
4771 4772 4773
        if (sort_param->master)
	  share->state.split++;
      }
4774 4775
      sort_param->max_pos= (sort_param->pos=block_info.filepos+
                            block_info.rec_len);
4776
      info->packed_length=block_info.rec_len;
unknown's avatar
unknown committed
4777

4778
      if (sort_param->calc_checksum)
unknown's avatar
unknown committed
4779
      {
4780
        info->cur_row.checksum= (*share->calc_check_checksum)(info,
4781 4782
                                                                sort_param->
                                                                record);
unknown's avatar
unknown committed
4783 4784
	param->glob_crc+= info->cur_row.checksum;
      }
4785 4786 4787 4788 4789 4790 4791
      DBUG_RETURN(0);
    }
  }
  DBUG_RETURN(1);		/* Impossible */
}


4792 4793
/**
   @brief Write record to new file.
4794

4795 4796
   @fn    _ma_sort_write_record()
   @param sort_param                Sort parameters.
4797

4798 4799
   @note
   This is only called by a master thread if parallel repair is used.
4800

4801 4802 4803 4804 4805 4806 4807
   @return
   @retval  0   OK
                sort_param->current_filepos points to inserted record for
                block_records and to the place for the next record for
                other row types.
                sort_param->filepos points to end of file
  @retval   1   Error
4808
*/
4809 4810 4811 4812 4813 4814

int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param)
{
  int flag;
  uint length;
  ulong block_length,reclength;
unknown's avatar
unknown committed
4815 4816
  uchar *from;
  uchar block_buff[8];
4817
  MARIA_SORT_INFO *sort_info=sort_param->sort_info;
4818 4819
  HA_CHECK *param= sort_info->param;
  MARIA_HA *info= sort_info->new_info;
4820
  MARIA_SHARE *share= info->s;
4821 4822 4823 4824
  DBUG_ENTER("_ma_sort_write_record");

  if (sort_param->fix_datafile)
  {
4825
    sort_param->current_filepos= sort_param->filepos;
4826
    switch (sort_info->new_data_file_type) {
unknown's avatar
unknown committed
4827
    case BLOCK_RECORD:
4828 4829
      if ((sort_param->current_filepos=
           (*share->write_record_init)(info, sort_param->record)) ==
4830 4831
          HA_OFFSET_ERROR)
        DBUG_RETURN(1);
4832 4833
      /* Pointer to end of file */
      sort_param->filepos= info->state->data_file_length;
unknown's avatar
unknown committed
4834
      break;
4835 4836 4837 4838 4839 4840 4841 4842
    case STATIC_RECORD:
      if (my_b_write(&info->rec_cache,sort_param->record,
		     share->base.pack_reclength))
      {
	_ma_check_print_error(param,"%d when writing to datafile",my_errno);
	DBUG_RETURN(1);
      }
      sort_param->filepos+=share->base.pack_reclength;
4843
      share->state.split++;
4844 4845 4846 4847 4848 4849 4850
      break;
    case DYNAMIC_RECORD:
      if (! info->blobs)
	from=sort_param->rec_buff;
      else
      {
	/* must be sure that local buffer is big enough */
4851
	reclength=share->base.pack_reclength+
4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864
	  _ma_calc_total_blob_length(info,sort_param->record)+
	  ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
	  MARIA_DYN_DELETE_BLOCK_HEADER;
	if (sort_info->buff_length < reclength)
	{
	  if (!(sort_info->buff=my_realloc(sort_info->buff, (uint) reclength,
					   MYF(MY_FREE_ON_ERROR |
					       MY_ALLOW_ZERO_PTR))))
	    DBUG_RETURN(1);
	  sort_info->buff_length=reclength;
	}
	from=sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER);
      }
unknown's avatar
unknown committed
4865
      /* We can use info->checksum here as only one thread calls this */
4866
      info->cur_row.checksum= (*share->calc_check_checksum)(info,
4867 4868
                                                              sort_param->
                                                              record);
4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888
      reclength= _ma_rec_pack(info,from,sort_param->record);
      flag=0;

      do
      {
	block_length=reclength+ 3 + test(reclength >= (65520-3));
	if (block_length < share->base.min_block_length)
	  block_length=share->base.min_block_length;
	info->update|=HA_STATE_WRITE_AT_END;
	block_length=MY_ALIGN(block_length,MARIA_DYN_ALIGN_SIZE);
	if (block_length > MARIA_MAX_BLOCK_LENGTH)
	  block_length=MARIA_MAX_BLOCK_LENGTH;
	if (_ma_write_part_record(info,0L,block_length,
				  sort_param->filepos+block_length,
				  &from,&reclength,&flag))
	{
	  _ma_check_print_error(param,"%d when writing to datafile",my_errno);
	  DBUG_RETURN(1);
	}
	sort_param->filepos+=block_length;
4889
	share->state.split++;
4890 4891 4892 4893 4894 4895
      } while (reclength);
      break;
    case COMPRESSED_RECORD:
      reclength=info->packed_length;
      length= _ma_save_pack_length((uint) share->pack.version, block_buff,
                               reclength);
4896
      if (share->base.blobs)
4897 4898 4899
	length+= _ma_save_pack_length((uint) share->pack.version,
	                          block_buff + length, info->blob_length);
      if (my_b_write(&info->rec_cache,block_buff,length) ||
unknown's avatar
unknown committed
4900
	  my_b_write(&info->rec_cache,(uchar*) sort_param->rec_buff,reclength))
4901 4902 4903 4904 4905
      {
	_ma_check_print_error(param,"%d when writing to datafile",my_errno);
	DBUG_RETURN(1);
      }
      sort_param->filepos+=reclength+length;
4906
      share->state.split++;
4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924
      break;
    }
  }
  if (sort_param->master)
  {
    info->state->records++;
    if ((param->testflag & T_WRITE_LOOP) &&
        (info->state->records % WRITE_COUNT) == 0)
    {
      char llbuff[22];
      printf("%s\r", llstr(info->state->records,llbuff));
      VOID(fflush(stdout));
    }
  }
  DBUG_RETURN(0);
} /* _ma_sort_write_record */


4925
/* Compare two keys from _ma_create_index_by_sort */
4926 4927 4928 4929 4930 4931 4932 4933 4934 4935

static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
			const void *b)
{
  uint not_used[2];
  return (ha_key_cmp(sort_param->seg, *((uchar**) a), *((uchar**) b),
		     USE_WHOLE_KEY, SEARCH_SAME, not_used));
} /* sort_key_cmp */


unknown's avatar
unknown committed
4936
static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a)
4937 4938 4939 4940 4941 4942 4943 4944 4945
{
  uint diff_pos[2];
  char llbuff[22],llbuff2[22];
  MARIA_SORT_INFO *sort_info=sort_param->sort_info;
  HA_CHECK *param= sort_info->param;
  int cmp;

  if (sort_info->key_block->inited)
  {
unknown's avatar
unknown committed
4946
    cmp=ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey,
4947 4948 4949
		   (uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE,
		   diff_pos);
    if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
unknown's avatar
unknown committed
4950
      ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey,
4951 4952 4953 4954 4955 4956 4957
                 (uchar*) a, USE_WHOLE_KEY,
                 SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos);
    else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
    {
      diff_pos[0]= maria_collect_stats_nonulls_next(sort_param->seg,
                                                 sort_param->notnull,
                                                 sort_info->key_block->lastkey,
unknown's avatar
unknown committed
4958
                                                 a);
4959 4960 4961 4962 4963 4964 4965 4966
    }
    sort_param->unique[diff_pos[0]-1]++;
  }
  else
  {
    cmp= -1;
    if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
      maria_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
unknown's avatar
unknown committed
4967
                                        a);
4968 4969 4970 4971
  }
  if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
  {
    sort_info->dupp++;
unknown's avatar
unknown committed
4972 4973 4974
    sort_info->info->cur_row.lastpos= get_record_for_key(sort_info->info,
                                                         sort_param->keyinfo,
                                                         a);
4975 4976
    _ma_check_print_warning(param,
			   "Duplicate key for record at %10s against record at %10s",
unknown's avatar
unknown committed
4977
			   llstr(sort_info->info->cur_row.lastpos, llbuff),
4978 4979 4980 4981 4982 4983 4984
			   llstr(get_record_for_key(sort_info->info,
						    sort_param->keyinfo,
						    sort_info->key_block->
						    lastkey),
				 llbuff2));
    param->testflag|=T_RETRY_WITHOUT_QUICK;
    if (sort_info->param->testflag & T_VERBOSE)
unknown's avatar
unknown committed
4985
      _ma_print_key(stdout,sort_param->seg, a, USE_WHOLE_KEY);
4986 4987 4988 4989 4990 4991 4992 4993 4994 4995
    return (sort_delete_record(sort_param));
  }
#ifndef DBUG_OFF
  if (cmp > 0)
  {
    _ma_check_print_error(param,
			 "Internal error: Keys are not in order from sort");
    return(1);
  }
#endif
unknown's avatar
unknown committed
4996 4997
  return (sort_insert_key(sort_param, sort_info->key_block,
			  a, HA_OFFSET_ERROR));
4998 4999
} /* sort_key_write */

unknown's avatar
unknown committed
5000

5001 5002 5003 5004 5005 5006 5007 5008
int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param)
{
  MARIA_SORT_INFO *sort_info=sort_param->sort_info;
  SORT_KEY_BLOCKS *key_block=sort_info->key_block;
  MARIA_SHARE *share=sort_info->info->s;
  uint val_off, val_len;
  int error;
  SORT_FT_BUF *maria_ft_buf=sort_info->ft_buf;
unknown's avatar
unknown committed
5009
  uchar *from, *to;
5010 5011 5012

  val_len=share->ft2_keyinfo.keylength;
  get_key_full_length_rdonly(val_off, maria_ft_buf->lastkey);
unknown's avatar
unknown committed
5013
  to= maria_ft_buf->lastkey+val_off;
5014 5015 5016 5017

  if (maria_ft_buf->buf)
  {
    /* flushing first-level tree */
unknown's avatar
unknown committed
5018 5019
    error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
                           HA_OFFSET_ERROR);
5020 5021 5022 5023 5024
    for (from=to+val_len;
         !error && from < maria_ft_buf->buf;
         from+= val_len)
    {
      memcpy(to, from, val_len);
unknown's avatar
unknown committed
5025 5026
      error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
                             HA_OFFSET_ERROR);
5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045
    }
    return error;
  }
  /* flushing second-level tree keyblocks */
  error=_ma_flush_pending_blocks(sort_param);
  /* updating lastkey with second-level tree info */
  ft_intXstore(maria_ft_buf->lastkey+val_off, -maria_ft_buf->count);
  _ma_dpointer(sort_info->info, maria_ft_buf->lastkey+val_off+HA_FT_WLEN,
      share->state.key_root[sort_param->key]);
  /* restoring first level tree data in sort_info/sort_param */
  sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks;
  sort_param->keyinfo=share->keyinfo+sort_param->key;
  share->state.key_root[sort_param->key]=HA_OFFSET_ERROR;
  /* writing lastkey in first-level tree */
  return error ? error :
                 sort_insert_key(sort_param,sort_info->key_block,
                                 maria_ft_buf->lastkey,HA_OFFSET_ERROR);
}

unknown's avatar
unknown committed
5046 5047

static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
unknown's avatar
unknown committed
5048
                                   const uchar *a)
5049 5050 5051 5052 5053
{
  uint a_len, val_off, val_len, error;
  MARIA_SORT_INFO *sort_info= sort_param->sort_info;
  SORT_FT_BUF *ft_buf= sort_info->ft_buf;
  SORT_KEY_BLOCKS *key_block= sort_info->key_block;
5054
  MARIA_SHARE *share= sort_info->info->s;
5055

5056
  val_len=HA_FT_WLEN+share->base.rec_reflength;
5057 5058 5059 5060 5061 5062 5063 5064
  get_key_full_length_rdonly(a_len, (uchar *)a);

  if (!ft_buf)
  {
    /*
      use two-level tree only if key_reflength fits in rec_reflength place
      and row format is NOT static - for _ma_dpointer not to garble offsets
     */
5065 5066 5067
    if ((share->base.key_reflength <=
         share->base.rec_reflength) &&
        (share->options &
5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083
          (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)))
      ft_buf= (SORT_FT_BUF *)my_malloc(sort_param->keyinfo->block_length +
                                       sizeof(SORT_FT_BUF), MYF(MY_WME));

    if (!ft_buf)
    {
      sort_param->key_write=sort_key_write;
      return sort_key_write(sort_param, a);
    }
    sort_info->ft_buf= ft_buf;
    goto word_init_ft_buf;              /* no need to duplicate the code */
  }
  get_key_full_length_rdonly(val_off, ft_buf->lastkey);

  if (ha_compare_text(sort_param->seg->charset,
                      ((uchar *)a)+1,a_len-1,
unknown's avatar
unknown committed
5084
                      (uchar*) ft_buf->lastkey+1,val_off-1, 0, 0)==0)
5085
  {
unknown's avatar
unknown committed
5086
    uchar *p;
5087 5088 5089 5090
    if (!ft_buf->buf)                   /* store in second-level tree */
    {
      ft_buf->count++;
      return sort_insert_key(sort_param,key_block,
unknown's avatar
unknown committed
5091
                             a + a_len, HA_OFFSET_ERROR);
5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105
    }

    /* storing the key in the buffer. */
    memcpy (ft_buf->buf, (char *)a+a_len, val_len);
    ft_buf->buf+=val_len;
    if (ft_buf->buf < ft_buf->end)
      return 0;

    /* converting to two-level tree */
    p=ft_buf->lastkey+val_off;

    while (key_block->inited)
      key_block++;
    sort_info->key_block=key_block;
5106
    sort_param->keyinfo= &share->ft2_keyinfo;
5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139
    ft_buf->count=(ft_buf->buf - p)/val_len;

    /* flushing buffer to second-level tree */
    for (error=0; !error && p < ft_buf->buf; p+= val_len)
      error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR);
    ft_buf->buf=0;
    return error;
  }

  /* flushing buffer */
  if ((error=_ma_sort_ft_buf_flush(sort_param)))
    return error;

word_init_ft_buf:
  a_len+=val_len;
  memcpy(ft_buf->lastkey, a, a_len);
  ft_buf->buf=ft_buf->lastkey+a_len;
  /*
    32 is just a safety margin here
    (at least max(val_len, sizeof(nod_flag)) should be there).
    May be better performance could be achieved if we'd put
      (sort_info->keyinfo->block_length-32)/XXX
      instead.
        TODO: benchmark the best value for XXX.
  */
  ft_buf->end= ft_buf->lastkey+ (sort_param->keyinfo->block_length-32);
  return 0;
} /* sort_maria_ft_key_write */


	/* get pointer to record from a key */

static my_off_t get_record_for_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
unknown's avatar
unknown committed
5140
				   const uchar *key)
5141
{
unknown's avatar
unknown committed
5142
  return _ma_dpos(info,0, key + _ma_keylength(keyinfo, key));
5143 5144 5145 5146 5147 5148
} /* get_record_for_key */


	/* Insert a key in sort-key-blocks */

static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
unknown's avatar
unknown committed
5149
			   register SORT_KEY_BLOCKS *key_block,
unknown's avatar
unknown committed
5150
                           const uchar *key,
5151 5152 5153 5154
			   my_off_t prev_block)
{
  uint a_length,t_length,nod_flag;
  my_off_t filepos,key_file_length;
unknown's avatar
unknown committed
5155
  uchar *anc_buff,*lastkey;
5156 5157 5158 5159
  MARIA_KEY_PARAM s_temp;
  MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
  MARIA_SORT_INFO *sort_info= sort_param->sort_info;
  HA_CHECK *param=sort_info->param;
5160
  MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
5161 5162
  MARIA_HA *info= sort_info->info;
  MARIA_SHARE *share= info->s;
5163 5164
  DBUG_ENTER("sort_insert_key");

unknown's avatar
unknown committed
5165
  anc_buff= key_block->buff;
5166 5167
  lastkey=key_block->lastkey;
  nod_flag= (key_block == sort_info->key_block ? 0 :
5168
	     share->base.key_reflength);
5169 5170 5171 5172 5173 5174 5175 5176 5177

  if (!key_block->inited)
  {
    key_block->inited=1;
    if (key_block == sort_info->key_block_end)
    {
      _ma_check_print_error(param,"To many key-block-levels; Try increasing sort_key_blocks");
      DBUG_RETURN(1);
    }
5178 5179 5180 5181 5182
    a_length= share->keypage_header + nod_flag;
    key_block->end_pos= anc_buff + share->keypage_header;
    bzero(anc_buff, share->keypage_header);
    _ma_store_keynr(share, anc_buff, (uint) (sort_param->keyinfo -
                                            share->keyinfo));
5183 5184 5185
    lastkey=0;					/* No previous key in block */
  }
  else
5186
    a_length= _ma_get_page_used(share, anc_buff);
5187 5188 5189

	/* Save pointer to previous block */
  if (nod_flag)
5190
  {
5191
    _ma_store_keypage_flag(share, anc_buff, KEYPAGE_FLAG_ISNOD);
5192
    _ma_kpointer(info,key_block->end_pos,prev_block);
5193
  }
5194 5195

  t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,
unknown's avatar
unknown committed
5196
				(uchar*) 0,lastkey,lastkey,key,
5197 5198 5199
				 &s_temp);
  (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp);
  a_length+=t_length;
5200
  _ma_store_page_used(share, anc_buff, a_length);
5201
  key_block->end_pos+=t_length;
unknown's avatar
unknown committed
5202
  if (a_length <= (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE))
5203
  {
unknown's avatar
unknown committed
5204
    VOID(_ma_move_key(keyinfo, key_block->lastkey, key));
5205 5206 5207 5208
    key_block->last_length=a_length-t_length;
    DBUG_RETURN(0);
  }

5209
  /* Fill block with end-zero and write filled block */
5210
  _ma_store_page_used(share, anc_buff, key_block->last_length);
unknown's avatar
unknown committed
5211
  bzero(anc_buff+key_block->last_length,
5212 5213
	keyinfo->block_length- key_block->last_length);
  key_file_length=info->state->key_file_length;
5214
  if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) == HA_OFFSET_ERROR)
5215
    DBUG_RETURN(1);
5216
  _ma_fast_unlock_key_del(info);
5217 5218

  /* If we read the page from the key cache, we have to write it back to it */
5219
  if (page_link->changed)
5220
  {
5221 5222 5223 5224
    pop_dynamic(&info->pinned_pages);
    if (_ma_write_keypage(info, keyinfo, filepos,
                          PAGECACHE_LOCK_WRITE_UNLOCK,
                          DFLT_INIT_HITS, anc_buff))
5225 5226
      DBUG_RETURN(1);
  }
unknown's avatar
unknown committed
5227 5228 5229 5230 5231 5232 5233
  else
  {
    put_crc(anc_buff, filepos, share);
    if (my_pwrite(share->kfile.file, anc_buff,
                  (uint) keyinfo->block_length, filepos, param->myf_rw))
      DBUG_RETURN(1);
  }
5234
  DBUG_DUMP("buff", anc_buff, _ma_get_page_used(share, anc_buff));
5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245

	/* Write separator-key to block in next level */
  if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos))
    DBUG_RETURN(1);

	/* clear old block and write new key in it */
  key_block->inited=0;
  DBUG_RETURN(sort_insert_key(sort_param, key_block,key,prev_block));
} /* sort_insert_key */


5246
/* Delete record when we found a duplicated key */
5247 5248 5249 5250 5251

static int sort_delete_record(MARIA_SORT_PARAM *sort_param)
{
  uint i;
  int old_file,error;
unknown's avatar
unknown committed
5252
  uchar *key;
5253 5254
  MARIA_SORT_INFO *sort_info=sort_param->sort_info;
  HA_CHECK *param=sort_info->param;
5255
  MARIA_HA *row_info= sort_info->new_info, *key_info= sort_info->info;
5256 5257 5258 5259 5260
  DBUG_ENTER("sort_delete_record");

  if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
  {
    _ma_check_print_error(param,
5261 5262
			 "Quick-recover aborted; Run recovery without switch -q or with "
                          "switch -qq");
5263 5264
    DBUG_RETURN(1);
  }
5265
  if (key_info->s->options & HA_OPTION_COMPRESS_RECORD)
5266 5267
  {
    _ma_check_print_error(param,
5268 5269 5270
                          "Recover aborted; Can't run standard recovery on "
                          "compressed tables with errors in data-file. "
                          "Use 'maria_chk --safe-recover' to fix it");
5271 5272 5273
    DBUG_RETURN(1);
  }

5274 5275 5276
  old_file= row_info->dfile.file;
  /* This only affects static and dynamic row formats */
  row_info->dfile.file= row_info->rec_cache.file;
unknown's avatar
unknown committed
5277 5278 5279 5280 5281 5282
  if (flush_io_cache(&row_info->rec_cache))
    DBUG_RETURN(1);

  key= key_info->lastkey + key_info->s->base.max_key_length;
  if ((error=(*row_info->s->read_record)(row_info, sort_param->record,
                                         key_info->cur_row.lastpos)) &&
5283
	error != HA_ERR_RECORD_DELETED)
unknown's avatar
unknown committed
5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295
  {
    _ma_check_print_error(param,"Can't read record to be removed");
    row_info->dfile.file= old_file;
    DBUG_RETURN(1);
  }
  row_info->cur_row.lastpos= key_info->cur_row.lastpos;

  for (i=0 ; i < sort_info->current_key ; i++)
  {
    uint key_length= _ma_make_key(key_info, i, key, sort_param->record,
                                  key_info->cur_row.lastpos);
    if (_ma_ck_delete(key_info, i, key, key_length))
5296
    {
unknown's avatar
unknown committed
5297 5298 5299
      _ma_check_print_error(param,
                            "Can't delete key %d from record to be removed",
                            i+1);
5300
      row_info->dfile.file= old_file;
5301 5302 5303
      DBUG_RETURN(1);
    }
  }
unknown's avatar
unknown committed
5304 5305 5306 5307 5308 5309 5310
  if (sort_param->calc_checksum)
    param->glob_crc-=(*key_info->s->calc_check_checksum)(key_info,
                                                         sort_param->record);
  error= (*row_info->s->delete_record)(row_info, sort_param->record);
  if (error)
    _ma_check_print_error(param,"Got error %d when deleting record",
                          my_errno);
5311 5312
  row_info->dfile.file= old_file;           /* restore actual value */
  row_info->state->records--;
5313 5314 5315
  DBUG_RETURN(error);
} /* sort_delete_record */

5316 5317

/* Fix all pending blocks and flush everything to disk */
5318 5319 5320 5321 5322 5323 5324 5325 5326 5327

int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param)
{
  uint nod_flag,length;
  my_off_t filepos,key_file_length;
  SORT_KEY_BLOCKS *key_block;
  MARIA_SORT_INFO *sort_info= sort_param->sort_info;
  myf myf_rw=sort_info->param->myf_rw;
  MARIA_HA *info=sort_info->info;
  MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
5328
  MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
5329 5330 5331 5332 5333 5334 5335
  DBUG_ENTER("_ma_flush_pending_blocks");

  filepos= HA_OFFSET_ERROR;			/* if empty file */
  nod_flag=0;
  for (key_block=sort_info->key_block ; key_block->inited ; key_block++)
  {
    key_block->inited=0;
5336
    length= _ma_get_page_used(info->s, key_block->buff);
5337 5338 5339
    if (nod_flag)
      _ma_kpointer(info,key_block->end_pos,filepos);
    key_file_length=info->state->key_file_length;
unknown's avatar
unknown committed
5340
    bzero(key_block->buff+length, keyinfo->block_length-length);
5341 5342
    if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
        HA_OFFSET_ERROR)
5343
      goto err;
5344 5345

    /* If we read the page from the key cache, we have to write it back */
5346
    if (page_link->changed)
5347
    {
5348
      pop_dynamic(&info->pinned_pages);
5349
      if (_ma_write_keypage(info, keyinfo, filepos,
5350
                            PAGECACHE_LOCK_WRITE_UNLOCK,
5351
                            DFLT_INIT_HITS, key_block->buff))
5352
	goto err;
5353
    }
unknown's avatar
unknown committed
5354 5355 5356 5357 5358
    else
    {
      put_crc(key_block->buff, filepos, info->s);
      if (my_pwrite(info->s->kfile.file, key_block->buff,
                    (uint) keyinfo->block_length,filepos, myf_rw))
5359
        goto err;
unknown's avatar
unknown committed
5360
    }
unknown's avatar
unknown committed
5361
    DBUG_DUMP("buff",key_block->buff,length);
5362 5363 5364
    nod_flag=1;
  }
  info->s->state.key_root[sort_param->key]=filepos; /* Last is root for tree */
5365
  _ma_fast_unlock_key_del(info);
5366
  DBUG_RETURN(0);
5367 5368 5369 5370

err:
  _ma_fast_unlock_key_del(info);
  DBUG_RETURN(1);
5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381
} /* _ma_flush_pending_blocks */

	/* alloc space and pointers for key_blocks */

static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
                                         uint buffer_length)
{
  reg1 uint i;
  SORT_KEY_BLOCKS *block;
  DBUG_ENTER("alloc_key_blocks");

unknown's avatar
unknown committed
5382 5383 5384
  if (!(block= (SORT_KEY_BLOCKS*) my_malloc((sizeof(SORT_KEY_BLOCKS)+
                                             buffer_length+IO_SIZE)*blocks,
                                            MYF(0))))
5385 5386 5387 5388 5389 5390 5391
  {
    _ma_check_print_error(param,"Not enough memory for sort-key-blocks");
    return(0);
  }
  for (i=0 ; i < blocks ; i++)
  {
    block[i].inited=0;
unknown's avatar
unknown committed
5392
    block[i].buff= (uchar*) (block+blocks)+(buffer_length+IO_SIZE)*i;
5393 5394 5395 5396 5397 5398 5399 5400 5401
  }
  DBUG_RETURN(block);
} /* alloc_key_blocks */


	/* Check if file is almost full */

int maria_test_if_almost_full(MARIA_HA *info)
{
5402 5403 5404
  MARIA_SHARE *share= info->s;

  if (share->options & HA_OPTION_COMPRESS_RECORD)
5405
    return 0;
5406
  return my_seek(share->kfile.file, 0L, MY_SEEK_END,
5407
                 MYF(MY_THREADSAFE))/10*9 >
5408
    (my_off_t) share->base.max_key_file_length ||
5409
    my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)) / 10 * 9 >
5410
    (my_off_t) share->base.max_data_file_length;
5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421
}

	/* Recreate table with bigger more alloced record-data */

int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename)
{
  int error;
  MARIA_HA info;
  MARIA_SHARE share;
  MARIA_KEYDEF *keyinfo,*key,*key_end;
  HA_KEYSEG *keysegs,*keyseg;
5422
  MARIA_COLUMNDEF *columndef,*column,*end;
5423 5424 5425 5426 5427 5428
  MARIA_UNIQUEDEF *uniquedef,*u_ptr,*u_end;
  MARIA_STATUS_INFO status_info;
  uint unpack,key_parts;
  ha_rows max_records;
  ulonglong file_length,tmp_length;
  MARIA_CREATE_INFO create_info;
5429
  DBUG_ENTER("maria_recreate_table");
5430 5431 5432 5433 5434 5435 5436 5437

  error=1;					/* Default error */
  info= **org_info;
  status_info= (*org_info)->state[0];
  info.state= &status_info;
  share= *(*org_info)->s;
  unpack= (share.options & HA_OPTION_COMPRESS_RECORD) &&
    (param->testflag & T_UNPACK);
unknown's avatar
unknown committed
5438 5439
  if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF) *
                                          share.base.keys)))
5440
    DBUG_RETURN(0);
unknown's avatar
unknown committed
5441
  memcpy((uchar*) keyinfo,(uchar*) share.keyinfo,
5442 5443 5444 5445 5446 5447
	 (size_t) (sizeof(MARIA_KEYDEF)*share.base.keys));

  key_parts= share.base.all_key_parts;
  if (!(keysegs=(HA_KEYSEG*) my_alloca(sizeof(HA_KEYSEG)*
				       (key_parts+share.base.keys))))
  {
unknown's avatar
unknown committed
5448
    my_afree((uchar*) keyinfo);
5449
    DBUG_RETURN(1);
5450
  }
5451
  if (!(columndef=(MARIA_COLUMNDEF*)
5452 5453
	my_alloca(sizeof(MARIA_COLUMNDEF)*(share.base.fields+1))))
  {
unknown's avatar
unknown committed
5454 5455
    my_afree((uchar*) keyinfo);
    my_afree((uchar*) keysegs);
5456
    DBUG_RETURN(1);
5457 5458 5459 5460
  }
  if (!(uniquedef=(MARIA_UNIQUEDEF*)
	my_alloca(sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques+1))))
  {
unknown's avatar
unknown committed
5461 5462 5463
    my_afree((uchar*) columndef);
    my_afree((uchar*) keyinfo);
    my_afree((uchar*) keysegs);
5464
    DBUG_RETURN(1);
5465 5466 5467
  }

  /* Copy the column definitions */
unknown's avatar
unknown committed
5468
  memcpy((uchar*) columndef,(uchar*) share.columndef,
5469
	 (size_t) (sizeof(MARIA_COLUMNDEF)*(share.base.fields+1)));
5470 5471 5472
  for (column=columndef, end= columndef+share.base.fields;
       column != end ;
       column++)
5473 5474
  {
    if (unpack && !(share.options & HA_OPTION_PACK_RECORD) &&
5475 5476 5477 5478
	column->type != FIELD_BLOB &&
	column->type != FIELD_VARCHAR &&
	column->type != FIELD_CHECK)
      column->type=(int) FIELD_NORMAL;
5479 5480 5481
  }

  /* Change the new key to point at the saved key segments */
unknown's avatar
unknown committed
5482
  memcpy((uchar*) keysegs,(uchar*) share.keyparts,
5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496
	 (size_t) (sizeof(HA_KEYSEG)*(key_parts+share.base.keys+
				      share.state.header.uniques)));
  keyseg=keysegs;
  for (key=keyinfo,key_end=keyinfo+share.base.keys; key != key_end ; key++)
  {
    key->seg=keyseg;
    for (; keyseg->type ; keyseg++)
    {
      if (param->language)
	keyseg->language=param->language;	/* change language */
    }
    keyseg++;					/* Skip end pointer */
  }

unknown's avatar
unknown committed
5497 5498 5499 5500
  /*
    Copy the unique definitions and change them to point at the new key
    segments
  */
unknown's avatar
unknown committed
5501
  memcpy((uchar*) uniquedef,(uchar*) share.uniqueinfo,
5502 5503 5504 5505 5506 5507 5508 5509 5510 5511
	 (size_t) (sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques)));
  for (u_ptr=uniquedef,u_end=uniquedef+share.state.header.uniques;
       u_ptr != u_end ; u_ptr++)
  {
    u_ptr->seg=keyseg;
    keyseg+=u_ptr->keysegs+1;
  }
  if (share.options & HA_OPTION_COMPRESS_RECORD)
    share.base.records=max_records=info.state->records;
  else if (share.base.min_pack_length)
unknown's avatar
unknown committed
5512 5513
    max_records=(ha_rows) (my_seek(info.dfile.file, 0L, MY_SEEK_END,
                                   MYF(0)) /
5514 5515 5516
			   (ulong) share.base.min_pack_length);
  else
    max_records=0;
unknown's avatar
unknown committed
5517
  unpack= (share.data_file_type == COMPRESSED_RECORD) &&
5518 5519 5520
    (param->testflag & T_UNPACK);
  share.options&= ~HA_OPTION_TEMP_COMPRESS_RECORD;

unknown's avatar
unknown committed
5521
  file_length=(ulonglong) my_seek(info.dfile.file, 0L, MY_SEEK_END, MYF(0));
5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538
  tmp_length= file_length+file_length/10;
  set_if_bigger(file_length,param->max_data_file_length);
  set_if_bigger(file_length,tmp_length);
  set_if_bigger(file_length,(ulonglong) share.base.max_data_file_length);

  VOID(maria_close(*org_info));
  bzero((char*) &create_info,sizeof(create_info));
  create_info.max_rows=max(max_records,share.base.records);
  create_info.reloc_rows=share.base.reloc;
  create_info.old_options=(share.options |
			   (unpack ? HA_OPTION_TEMP_COMPRESS_RECORD : 0));

  create_info.data_file_length=file_length;
  create_info.auto_increment=share.state.auto_increment;
  create_info.language = (param->language ? param->language :
			  share.state.header.language);
  create_info.key_file_length=  status_info.key_file_length;
unknown's avatar
unknown committed
5539 5540 5541
  create_info.org_data_file_type= ((enum data_file_type)
                                   share.state.header.org_data_file_type);

5542 5543 5544 5545 5546
  /*
    Allow for creating an auto_increment key. This has an effect only if
    an auto_increment key exists in the original table.
  */
  create_info.with_auto_increment= TRUE;
unknown's avatar
unknown committed
5547 5548 5549 5550 5551 5552 5553
  create_info.null_bytes= share.base.null_bytes;
  /*
    We don't have to handle symlinks here because we are using
    HA_DONT_TOUCH_DATA
  */
  if (maria_create(filename, share.data_file_type,
                   share.base.keys - share.state.header.uniques,
5554
                   keyinfo, share.base.fields, columndef,
unknown's avatar
unknown committed
5555 5556 5557 5558 5559 5560 5561
                   share.state.header.uniques, uniquedef,
                   &create_info,
                   HA_DONT_TOUCH_DATA))
  {
    _ma_check_print_error(param,
                          "Got error %d when trying to recreate indexfile",
                          my_errno);
5562 5563 5564 5565 5566 5567 5568 5569
    goto end;
  }
  *org_info=maria_open(filename,O_RDWR,
		    (param->testflag & T_WAIT_FOREVER) ? HA_OPEN_WAIT_IF_LOCKED :
		    (param->testflag & T_DESCRIPT) ? HA_OPEN_IGNORE_IF_LOCKED :
		    HA_OPEN_ABORT_IF_LOCKED);
  if (!*org_info)
  {
unknown's avatar
unknown committed
5570 5571 5572
    _ma_check_print_error(param,
                          "Got error %d when trying to open re-created indexfile",
                          my_errno);
5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588
    goto end;
  }
  /* We are modifing */
  (*org_info)->s->options&= ~HA_OPTION_READ_ONLY_DATA;
  VOID(_ma_readinfo(*org_info,F_WRLCK,0));
  (*org_info)->state->records=info.state->records;
  if (share.state.create_time)
    (*org_info)->s->state.create_time=share.state.create_time;
  (*org_info)->s->state.unique=(*org_info)->this_unique=
    share.state.unique;
  (*org_info)->state->checksum=info.state->checksum;
  (*org_info)->state->del=info.state->del;
  (*org_info)->s->state.dellink=share.state.dellink;
  (*org_info)->state->empty=info.state->empty;
  (*org_info)->state->data_file_length=info.state->data_file_length;
  if (maria_update_state_info(param,*org_info,UPDATE_TIME | UPDATE_STAT |
5589
                              UPDATE_OPEN_COUNT))
5590 5591 5592
    goto end;
  error=0;
end:
unknown's avatar
unknown committed
5593 5594 5595 5596
  my_afree((uchar*) uniquedef);
  my_afree((uchar*) keyinfo);
  my_afree((uchar*) columndef);
  my_afree((uchar*) keysegs);
5597
  DBUG_RETURN(error);
5598 5599 5600 5601 5602 5603 5604
}


	/* write suffix to data file if neaded */

int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile)
{
5605
  MARIA_HA *info=sort_info->new_info;
5606

5607
  if (info->s->data_file_type == COMPRESSED_RECORD && fix_datafile)
5608
  {
5609
    uchar buff[MEMMAP_EXTRA_MARGIN];
5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621
    bzero(buff,sizeof(buff));
    if (my_b_write(&info->rec_cache,buff,sizeof(buff)))
    {
      _ma_check_print_error(sort_info->param,
			   "%d when writing to datafile",my_errno);
      return 1;
    }
    sort_info->param->read_cache.end_of_file+=sizeof(buff);
  }
  return 0;
}

unknown's avatar
unknown committed
5622 5623

/* Update state and maria_chk time of indexfile */
5624 5625 5626

int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update)
{
5627
  MARIA_SHARE *share= info->s;
5628
  DBUG_ENTER("maria_update_state_info");
5629 5630 5631 5632 5633 5634 5635 5636 5637

  if (update & UPDATE_OPEN_COUNT)
  {
    share->state.open_count=0;
    share->global_changed=0;
  }
  if (update & UPDATE_STAT)
  {
    uint i, key_parts= mi_uint2korr(share->state.header.key_parts);
5638
    share->state.records_at_analyze= info->state->records;
5639 5640 5641 5642 5643
    share->state.changed&= ~STATE_NOT_ANALYZED;
    if (info->state->records)
    {
      for (i=0; i<key_parts; i++)
      {
5644
        if (!(share->state.rec_per_key_part[i]=param->new_rec_per_key_part[i]))
5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664
          share->state.changed|= STATE_NOT_ANALYZED;
      }
    }
  }
  if (update & (UPDATE_STAT | UPDATE_SORT | UPDATE_TIME | UPDATE_AUTO_INC))
  {
    if (update & UPDATE_TIME)
    {
      share->state.check_time= (long) time((time_t*) 0);
      if (!share->state.create_time)
	share->state.create_time=share->state.check_time;
    }
    /*
      When tables are locked we haven't synched the share state and the
      real state for a while so we better do it here before synching
      the share state to disk. Only when table is write locked is it
      necessary to perform this synch.
    */
    if (info->lock_type == F_WRLCK)
      share->state.state= *info->state;
unknown's avatar
unknown committed
5665
    if (_ma_state_info_write(share, 1|2))
5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677
      goto err;
    share->changed=0;
  }
  {						/* Force update of status */
    int error;
    uint r_locks=share->r_locks,w_locks=share->w_locks;
    share->r_locks= share->w_locks= share->tot_locks= 0;
    error= _ma_writeinfo(info,WRITEINFO_NO_UNLOCK);
    share->r_locks=r_locks;
    share->w_locks=w_locks;
    share->tot_locks=r_locks+w_locks;
    if (!error)
5678
      DBUG_RETURN(0);
5679 5680 5681
  }
err:
  _ma_check_print_error(param,"%d when updating keyfile",my_errno);
5682
  DBUG_RETURN(1);
5683 5684
}

5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696
/*
  Update auto increment value for a table
  When setting the 'repair_only' flag we only want to change the
  old auto_increment value if its wrong (smaller than some given key).
  The reason is that we shouldn't change the auto_increment value
  for a table without good reason when only doing a repair; If the
  user have inserted and deleted rows, the auto_increment value
  may be bigger than the biggest current row and this is ok.

  If repair_only is not set, we will update the flag to the value in
  param->auto_increment is bigger than the biggest key.
*/
5697 5698 5699 5700

void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info,
                                   my_bool repair_only)
{
5701
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
5702
  uchar *record;
5703 5704
  DBUG_ENTER("update_auto_increment_key");

5705 5706
  if (!share->base.auto_key ||
      ! maria_is_key_active(share->state.key_map, share->base.auto_key - 1))
5707 5708 5709 5710 5711
  {
    if (!(param->testflag & T_VERY_SILENT))
      _ma_check_print_info(param,
			  "Table: %s doesn't have an auto increment key\n",
			  param->isam_file_name);
5712
    DBUG_VOID_RETURN;
5713 5714 5715 5716 5717 5718 5719 5720
  }
  if (!(param->testflag & T_SILENT) &&
      !(param->testflag & T_REP))
    printf("Updating MARIA file: %s\n", param->isam_file_name);
  /*
    We have to use an allocated buffer instead of info->rec_buff as
    _ma_put_key_in_record() may use info->rec_buff
  */
5721
  if (!(record= (uchar*) my_malloc((uint) share->base.pack_reclength,
5722 5723 5724
				  MYF(0))))
  {
    _ma_check_print_error(param,"Not enough memory for extra record");
5725
    DBUG_VOID_RETURN;
5726 5727 5728
  }

  maria_extra(info,HA_EXTRA_KEYREAD,0);
5729
  if (maria_rlast(info, record, share->base.auto_key-1))
5730 5731 5732 5733 5734 5735
  {
    if (my_errno != HA_ERR_END_OF_FILE)
    {
      maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
      my_free((char*) record, MYF(0));
      _ma_check_print_error(param,"%d when reading last record",my_errno);
5736
      DBUG_VOID_RETURN;
5737 5738
    }
    if (!repair_only)
5739
      share->state.auto_increment=param->auto_increment_value;
5740 5741 5742
  }
  else
  {
5743 5744 5745
    const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
    ulonglong auto_increment=
      ma_retrieve_auto_increment(record + keyseg->start, keyseg->type);
5746
    set_if_bigger(share->state.auto_increment,auto_increment);
5747
    if (!repair_only)
5748
      set_if_bigger(share->state.auto_increment, param->auto_increment_value);
5749 5750 5751 5752
  }
  maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
  my_free((char*) record, MYF(0));
  maria_update_state_info(param, info, UPDATE_AUTO_INC);
5753
  DBUG_VOID_RETURN;
5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807
}


/*
  Update statistics for each part of an index

  SYNOPSIS
    maria_update_key_parts()
      keyinfo           IN  Index information (only key->keysegs used)
      rec_per_key_part  OUT Store statistics here
      unique            IN  Array of (#distinct tuples)
      notnull_tuples    IN  Array of (#tuples), or NULL
      records               Number of records in the table

  DESCRIPTION
    This function is called produce index statistics values from unique and
    notnull_tuples arrays after these arrays were produced with sequential
    index scan (the scan is done in two places: chk_index() and
    sort_key_write()).

    This function handles all 3 index statistics collection methods.

    Unique is an array:
      unique[0]= (#different values of {keypart1}) - 1
      unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1
      ...

    For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too:
      notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL)
      notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all
                          keypart{i} are not NULL)
      ...
    For all other statistics collection methods notnull_tuples==NULL.

    Output is an array:
    rec_per_key_part[k] =
     = E(#records in the table such that keypart_1=c_1 AND ... AND
         keypart_k=c_k for arbitrary constants c_1 ... c_k)

     = {assuming that values have uniform distribution and index contains all
        tuples from the domain (or that {c_1, ..., c_k} tuple is choosen from
        index tuples}

     = #tuples-in-the-index / #distinct-tuples-in-the-index.

    The #tuples-in-the-index and #distinct-tuples-in-the-index have different
    meaning depending on which statistics collection method is used:

    MI_STATS_METHOD_*  how are nulls compared?  which tuples are counted?
     NULLS_EQUAL            NULL == NULL           all tuples in table
     NULLS_NOT_EQUAL        NULL != NULL           all tuples in table
     IGNORE_NULLS               n/a             tuples that don't have NULLs
*/

5808
void maria_update_key_parts(MARIA_KEYDEF *keyinfo, double *rec_per_key_part,
5809 5810 5811
                      ulonglong *unique, ulonglong *notnull,
                      ulonglong records)
{
5812
  ulonglong count=0, unique_tuples;
5813 5814
  ulonglong tuples= records;
  uint parts;
5815
  double tmp;
5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833
  for (parts=0 ; parts < keyinfo->keysegs  ; parts++)
  {
    count+=unique[parts];
    unique_tuples= count + 1;
    if (notnull)
    {
      tuples= notnull[parts];
      /*
        #(unique_tuples not counting tuples with NULLs) =
          #(unique_tuples counting tuples with NULLs as different) -
          #(tuples with NULLs)
      */
      unique_tuples -= (records - notnull[parts]);
    }

    if (unique_tuples == 0)
      tmp= 1;
    else if (count == 0)
5834
      tmp= ulonglong2double(tuples); /* 1 unique tuple */
5835
    else
5836
      tmp= ulonglong2double(tuples) / ulonglong2double(unique_tuples);
5837 5838 5839 5840 5841 5842 5843

    /*
      for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
      let's ensure it is not
    */
    set_if_bigger(tmp,1);

5844
    *rec_per_key_part++= tmp;
5845 5846 5847 5848
  }
}


unknown's avatar
unknown committed
5849
static ha_checksum maria_byte_checksum(const uchar *buf, uint length)
5850 5851
{
  ha_checksum crc;
unknown's avatar
unknown committed
5852
  const uchar *end=buf+length;
5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884
  for (crc=0; buf != end; buf++)
    crc=((crc << 1) + *((uchar*) buf)) +
      test(crc & (((ha_checksum) 1) << (8*sizeof(ha_checksum)-1)));
  return crc;
}

static my_bool maria_too_big_key_for_sort(MARIA_KEYDEF *key, ha_rows rows)
{
  uint key_maxlength=key->maxlength;
  if (key->flag & HA_FULLTEXT)
  {
    uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
                                  key->seg->charset->mbmaxlen;
    key_maxlength+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
  }
  return (key->flag & HA_SPATIAL) ||
          (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) &&
	  ((ulonglong) rows * key_maxlength >
	   (ulonglong) maria_max_temp_length));
}

/*
  Deactivate all not unique index that can be recreated fast
  These include packed keys on which sorting will use more temporary
  space than the max allowed file length or for which the unpacked keys
  will take much more space than packed keys.
  Note that 'rows' may be zero for the case when we don't know how many
  rows we will put into the file.
 */

void maria_disable_non_unique_index(MARIA_HA *info, ha_rows rows)
{
5885
  MARIA_SHARE *share= info->s;
5886 5887 5888 5889 5890 5891 5892 5893
  MARIA_KEYDEF    *key=share->keyinfo;
  uint          i;

  DBUG_ASSERT(info->state->records == 0 &&
              (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES));
  for (i=0 ; i < share->base.keys ; i++,key++)
  {
    if (!(key->flag & (HA_NOSAME | HA_SPATIAL | HA_AUTO_KEY)) &&
5894
        ! maria_too_big_key_for_sort(key,rows) && share->base.auto_key != i+1)
5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909
    {
      maria_clear_key_active(share->state.key_map, i);
      info->update|= HA_STATE_CHANGED;
    }
  }
}


/*
  Return TRUE if we can use repair by sorting
  One can set the force argument to force to use sorting
  even if the temporary file would be quite big!
*/

my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows,
5910
                               ulonglong key_map, my_bool force)
5911
{
5912
  MARIA_SHARE *share= info->s;
5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930
  MARIA_KEYDEF *key=share->keyinfo;
  uint i;

  /*
    maria_repair_by_sort only works if we have at least one key. If we don't
    have any keys, we should use the normal repair.
  */
  if (! maria_is_any_key_active(key_map))
    return FALSE;				/* Can't use sort */
  for (i=0 ; i < share->base.keys ; i++,key++)
  {
    if (!force && maria_too_big_key_for_sort(key,rows))
      return FALSE;
  }
  return TRUE;
}


5931 5932 5933 5934 5935 5936 5937 5938
/**
   @brief Create a new handle for manipulation the new record file

   @note
   It's ok for Recovery to have two MARIA_SHARE on the same index file
   because the one we create here is not transactional
*/

unknown's avatar
unknown committed
5939
static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file)
5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951
{

  MARIA_SORT_INFO *sort_info= param->sort_info;
  MARIA_HA *info= sort_info->info;
  MARIA_HA *new_info;
  DBUG_ENTER("create_new_data_handle");

  if (!(sort_info->new_info= maria_open(info->s->open_file_name, O_RDWR,
                                        HA_OPEN_COPY | HA_OPEN_FOR_REPAIR)))
    DBUG_RETURN(1);

  new_info= sort_info->new_info;
5952 5953 5954
  _ma_bitmap_set_pagecache_callbacks(&new_info->s->bitmap.file,
                                     new_info->s);
  _ma_set_data_pagecache_callbacks(&new_info->dfile, new_info->s);
5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971
  change_data_file_descriptor(new_info, new_file);
  maria_lock_database(new_info, F_EXTRA_LCK);
  if ((sort_info->param->testflag & T_UNPACK) &&
      info->s->data_file_type == COMPRESSED_RECORD)
  {
    (*new_info->s->once_end)(new_info->s);
    (*new_info->s->end)(new_info);
    restore_data_file_type(new_info->s);
    _ma_setup_functions(new_info->s);
    if ((*new_info->s->once_init)(new_info->s, new_file) ||
        (*new_info->s->init)(new_info))
      DBUG_RETURN(1);
  }
  _ma_reset_status(new_info);
  if (_ma_initialize_data_file(new_info->s, new_file))
    DBUG_RETURN(1);

5972
  /* Take into account any bitmap page created above: */
5973 5974 5975 5976 5977 5978 5979 5980 5981
  param->filepos= new_info->state->data_file_length;

  /* Use new virtual functions for key generation */
  info->s->keypos_to_recpos= new_info->s->keypos_to_recpos;
  info->s->recpos_to_keypos= new_info->s->recpos_to_keypos;
  DBUG_RETURN(0);
}


5982 5983 5984 5985 5986 5987 5988
static void
set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share)
{
  if ((sort_info->new_data_file_type=share->data_file_type) ==
      COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK)
  {
    MARIA_SHARE tmp;
unknown's avatar
unknown committed
5989
    sort_info->new_data_file_type= share->state.header.org_data_file_type;
5990
    /* Set delete_function for sort_delete_record() */
5991 5992
    tmp= *share;
    tmp.state.header.data_file_type= tmp.state.header.org_data_file_type;
5993 5994 5995 5996 5997
    tmp.options= ~HA_OPTION_COMPRESS_RECORD;
    _ma_setup_functions(&tmp);
    share->delete_record=tmp.delete_record;
  }
}
unknown's avatar
unknown committed
5998 5999 6000

static void restore_data_file_type(MARIA_SHARE *share)
{
6001
  MARIA_SHARE tmp_share;
unknown's avatar
unknown committed
6002 6003 6004 6005
  share->options&= ~HA_OPTION_COMPRESS_RECORD;
  mi_int2store(share->state.header.options,share->options);
  share->state.header.data_file_type=
    share->state.header.org_data_file_type;
6006
  share->data_file_type= share->state.header.data_file_type;
unknown's avatar
unknown committed
6007
  share->pack.header_length= 0;
6008 6009 6010 6011 6012 6013

  /* Use new virtual functions for key generation */
  tmp_share= *share;
  _ma_setup_functions(&tmp_share);
  share->keypos_to_recpos= tmp_share.keypos_to_recpos;
  share->recpos_to_keypos= tmp_share.recpos_to_keypos;
unknown's avatar
unknown committed
6014
}
6015 6016


6017 6018
static void change_data_file_descriptor(MARIA_HA *info, File new_file)
{
unknown's avatar
unknown committed
6019
  my_close(info->dfile.file, MYF(MY_WME));
6020
  info->dfile.file= info->s->bitmap.file.file= new_file;
6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036
  _ma_bitmap_reset_cache(info->s);
}


/**
   @brief Mark the data file to not be used

   @note
   This is used in repair when we want to ensure the handler will not
   write anything to the data file anymore
*/

static void unuse_data_file_descriptor(MARIA_HA *info)
{
  info->dfile.file= info->s->bitmap.file.file= -1;
  _ma_bitmap_reset_cache(info->s);
6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084
}


/*
  Copy all states that has to do with the data file

  NOTES
    This is done to copy the state from the data file generated from
    repair to the original handler
*/

static void copy_data_file_state(MARIA_STATE_INFO *to,
                                 MARIA_STATE_INFO *from)
{
  to->state.records=           from->state.records;
  to->state.del=               from->state.del;
  to->state.empty=             from->state.empty;
  to->state.data_file_length=  from->state.data_file_length;
  to->split=                   from->split;
  to->dellink=		       from->dellink;
  to->first_bitmap_with_space= from->first_bitmap_with_space;
}


/*
  Read 'safely' next record while scanning table.

  SYNOPSIS
    _ma_safe_scan_block_record()
    info                Maria handler
    record              Store found here

  NOTES
    - One must have called mi_scan() before this

    Differences compared to  _ma_scan_block_records() are:
    - We read all blocks, not only blocks marked by the bitmap to be safe
    - In case of errors, next read will read next record.
    - More sanity checks

  RETURN
    0   ok
    HA_ERR_END_OF_FILE  End of file
    #   error number
*/


static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
6085
                                      MARIA_HA *info, uchar *record)
6086
{
6087
  MARIA_SHARE *share= info->s;
6088 6089
  MARIA_RECORD_POS record_pos= info->cur_row.nextpos;
  pgcache_page_no_t page= sort_info->page;
6090 6091 6092 6093 6094 6095 6096 6097
  DBUG_ENTER("_ma_safe_scan_block_record");

  for (;;)
  {
    /* Find next row in current page */
    if (likely(record_pos < info->scan.number_of_rows))
    {
      uint length, offset;
6098
      uchar *data, *end_of_data;
6099 6100 6101 6102 6103 6104 6105 6106 6107
      char llbuff[22];

      while (!(offset= uint2korr(info->scan.dir)))
      {
        info->scan.dir-= DIR_ENTRY_SIZE;
        record_pos++;
        if (info->scan.dir < info->scan.dir_end)
        {
          _ma_check_print_info(sort_info->param,
unknown's avatar
unknown committed
6108
                               "Wrong directory on page %s",
6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119
                               llstr(page, llbuff));
          goto read_next_page;
        }
      }
      /* found row */
      info->cur_row.lastpos= info->scan.row_base_page + record_pos;
      info->cur_row.nextpos= record_pos + 1;
      data= info->scan.page_buff + offset;
      length= uint2korr(info->scan.dir + 2);
      end_of_data= data + length;
      info->scan.dir-= DIR_ENTRY_SIZE;          /* Point to previous row */
6120

6121
      if (end_of_data > info->scan.dir_end ||
6122
          offset < PAGE_HEADER_SIZE || length < share->base.min_block_length)
6123 6124 6125
      {
        _ma_check_print_info(sort_info->param,
                             "Wrong directory entry %3u at page %s",
6126
                             (uint) record_pos, llstr(page, llbuff));
6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145
        record_pos++;
        continue;
      }
      else
      {
        DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos));
        DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data));
      }
    }

read_next_page:
    /* Read until we find next head page */
    for (;;)
    {
      uint page_type;
      char llbuff[22];

      sort_info->page++;                        /* In case of errors */
      page++;
6146
      if (!(page % share->bitmap.pages_covered))
6147
        page++;                                 /* Skip bitmap */
6148
      if ((my_off_t) (page + 1) * share->block_size > sort_info->filelength)
6149
        DBUG_RETURN(HA_ERR_END_OF_FILE);
6150
      if (!(pagecache_read(share->pagecache,
6151 6152 6153 6154
                           &info->dfile,
                           page, 0, info->scan.page_buff,
                           PAGECACHE_READ_UNKNOWN_PAGE,
                           PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
6155 6156 6157 6158
      {
        if (my_errno == HA_ERR_WRONG_CRC)
        {
          _ma_check_print_info(sort_info->param,
unknown's avatar
unknown committed
6159 6160
                               "Wrong CRC on datapage at %s",
                               llstr(page, llbuff));
6161 6162
          continue;
        }
6163
        DBUG_RETURN(my_errno);
6164
      }
6165 6166 6167 6168 6169 6170 6171 6172
      page_type= (info->scan.page_buff[PAGE_TYPE_OFFSET] &
                  PAGE_TYPE_MASK);
      if (page_type == HEAD_PAGE)
      {
        if ((info->scan.number_of_rows=
             (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) != 0)
          break;
        _ma_check_print_info(sort_info->param,
unknown's avatar
unknown committed
6173 6174
                             "Wrong head page at page %s",
                             llstr(page, llbuff));
6175 6176 6177 6178
      }
      else if (page_type >= MAX_PAGE_TYPE)
      {
        _ma_check_print_info(sort_info->param,
unknown's avatar
unknown committed
6179 6180
                             "Found wrong page type: %d at page %s",
                             page_type, llstr(page, llbuff));
6181 6182 6183 6184
      }
    }

    /* New head page */
6185
    info->scan.dir= (info->scan.page_buff + share->block_size -
6186 6187 6188 6189 6190 6191 6192 6193
                     PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE);
    info->scan.dir_end= (info->scan.dir -
                         (info->scan.number_of_rows - 1) *
                         DIR_ENTRY_SIZE);
    info->scan.row_base_page= ma_recordpos(page, 0);
    record_pos= 0;
  }
}
6194 6195


6196 6197
/**
   @brief Writes a LOGREC_REPAIR_TABLE record and updates create_rename_lsn
unknown's avatar
unknown committed
6198
   if needed (so that maria_read_log does not redo the repair).
6199 6200 6201 6202 6203 6204 6205 6206 6207

   @param  param            description of the REPAIR operation
   @param  info             table

   @return Operation status
     @retval 0      ok
     @retval 1      error (disk problem)
*/

unknown's avatar
unknown committed
6208
my_bool write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info)
6209
{
unknown's avatar
unknown committed
6210
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
6211
  /* in case this is maria_chk or recovery... */
6212
  if (translog_status == TRANSLOG_OK && !maria_in_recovery &&
6213
      share->base.born_transactional)
6214
  {
6215
    my_bool save_now_transactional= share->now_transactional;
6216

6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233
    /*
      For now this record is only informative. It could serve when applying
      logs to a backup, but that needs more thought. Assume table became
      corrupted. It is repaired, then some writes happen to it.
      Later we restore an old backup, and want to apply this REDO_REPAIR_TABLE
      record. For it to give the same result as originally, the table should
      be corrupted the same way, so applying previous REDOs should produce the
      same corruption; that's really not guaranteed (different execution paths
      in execution of REDOs vs runtime code so not same bugs hit, temporary
      hardware issues not repeatable etc). Corruption may not be repeatable.
      A reasonable solution is to execute the REDO_REPAIR_TABLE record and
      check if the checksum of the resulting table matches what it was at the
      end of the original repair (should be stored in log record); or execute
      the REDO_REPAIR_TABLE if the checksum of the table-before-repair matches
      was it was at the start of the original repair (should be stored in log
      record).
    */
6234
    LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
unknown's avatar
unknown committed
6235
    uchar log_data[FILEID_STORE_SIZE + 8 + 8];
unknown's avatar
unknown committed
6236
    LSN lsn;
6237

6238 6239 6240 6241
    /*
      testflag gives an idea of what REPAIR did (in particular T_QUICK
      or not: did it touch the data file or not?).
    */
unknown's avatar
unknown committed
6242
    int8store(log_data + FILEID_STORE_SIZE, param->testflag);
6243
    /* org_key_map is used when recreating index after a load data infile */
unknown's avatar
unknown committed
6244
    int8store(log_data + FILEID_STORE_SIZE + 8, param->org_key_map);
6245 6246 6247 6248

    log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    (char*) log_data;
    log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);

6249
    share->now_transactional= 1;
unknown's avatar
unknown committed
6250
    if (unlikely(translog_write_record(&lsn, LOGREC_REDO_REPAIR_TABLE,
unknown's avatar
unknown committed
6251
                                       &dummy_transaction_object, info,
6252
                                       (translog_size_t) sizeof(log_data),
6253
                                       sizeof(log_array)/sizeof(log_array[0]),
unknown's avatar
unknown committed
6254
                                       log_array, log_data, NULL) ||
unknown's avatar
unknown committed
6255
                 translog_flush(lsn)))
unknown's avatar
unknown committed
6256
      return TRUE;
6257
    /*
unknown's avatar
unknown committed
6258
      The table's existence was made durable earlier (MY_SYNC_DIR passed to
unknown's avatar
unknown committed
6259 6260 6261 6262 6263 6264 6265 6266 6267 6268
      maria_change_to_newfile()). All pages have been flushed, state too, we
      need to force it to disk. Old REDOs should not be applied to the table,
      which is already enforced as skip_redos_lsn was increased in
      protect_against_repair_crash(). But if this is an explicit repair,
      even UNDO phase should ignore this table: create_rename_lsn should be
      increased, and this also serves for the REDO_REPAIR to be ignored by
      maria_read_log.
      The fully correct order would be: sync data and index file, remove crash
      mark and update LSNs then write state and sync index file. But at this
      point state (without crash mark) is already written.
6269
    */
unknown's avatar
unknown committed
6270 6271 6272 6273
    if ((!(param->testflag & T_NO_CREATE_RENAME_LSN) &&
         _ma_update_state_lsns(share, lsn, FALSE, FALSE)) ||
        _ma_sync_table_files(info))
      return TRUE;
6274
    share->now_transactional= save_now_transactional;
6275
  }
unknown's avatar
unknown committed
6276 6277 6278 6279
  return FALSE;
}


6280 6281 6282 6283 6284 6285
/**
  Writes an UNDO record which if executed in UNDO phase, will empty the
  table. Such record is thus logged only in certain cases of bulk insert
  (table needs to be empty etc).
*/
my_bool write_log_record_for_bulk_insert(MARIA_HA *info)
unknown's avatar
unknown committed
6286 6287 6288 6289 6290 6291 6292
{
  LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
  uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE];
  LSN lsn;
  lsn_store(log_data, info->trn->undo_lsn);
  log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
  log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6293
  return translog_write_record(&lsn, LOGREC_UNDO_BULK_INSERT,
unknown's avatar
unknown committed
6294 6295 6296 6297 6298 6299 6300
                               info->trn, info,
                               (translog_size_t)
                               log_array[TRANSLOG_INTERNAL_PARTS +
                                         0].length,
                               TRANSLOG_INTERNAL_PARTS + 1, log_array,
                               log_data + LSN_STORE_SIZE, NULL) ||
    translog_flush(lsn); /* WAL */
6301
}
6302 6303 6304 6305


/* Give error message why reading of key page failed */

unknown's avatar
unknown committed
6306 6307
static void report_keypage_fault(HA_CHECK *param, MARIA_HA *info,
                                 my_off_t position)
6308 6309
{
  char buff[11];
unknown's avatar
unknown committed
6310
  uint32 block_size= info->s->block_size;
6311 6312 6313

  if (my_errno == HA_ERR_CRASHED)
    _ma_check_print_error(param,
unknown's avatar
unknown committed
6314 6315
                          "Wrong base information on indexpage at page: %s",
                          llstr(position / block_size, buff));
6316 6317
  else
    _ma_check_print_error(param,
unknown's avatar
unknown committed
6318
                          "Can't read indexpage from page: %s, "
6319
                          "error: %d",
unknown's avatar
unknown committed
6320
                          llstr(position / block_size, buff), my_errno);
6321
}