ma_check.c 222 KB
Newer Older
1 2 3 4
/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6 7 8 9 10 11 12 13 14 15 16 17

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* Describe, check and repair of MARIA tables */

18 19 20 21 22
/*
  About checksum calculation.

  There are two types of checksums. Table checksum and row checksum.

unknown's avatar
unknown committed
23
  Row checksum is an additional uchar at the end of dynamic length
24 25 26 27 28 29 30 31 32 33
  records. It must be calculated if the table is configured for them.
  Otherwise they must not be used. The variable
  MYISAM_SHARE::calc_checksum determines if row checksums are used.
  MI_INFO::checksum is used as temporary storage during row handling.
  For parallel repair we must assure that only one thread can use this
  variable. There is no problem on the write side as this is done by one
  thread only. But when checking a record after read this could go
  wrong. But since all threads read through a common read buffer, it is
  sufficient if only one thread checks it.

unknown's avatar
unknown committed
34
  Table checksum is an eight uchar value in the header of the index file.
35 36 37 38 39 40 41 42
  It can be calculated even if row checksums are not used. The variable
  MI_CHECK::glob_crc is calculated over all records.
  MI_SORT_PARAM::calc_checksum determines if this should be done. This
  variable is not part of MI_CHECK because it must be set per thread for
  parallel repair. The global glob_crc must be changed by one thread
  only. And it is sufficient to calculate the checksum once only.
*/

43
#include "ma_ftdefs.h"
44 45 46 47 48
#include "ma_rt_index.h"
#include "ma_blockrec.h"
#include "trnman.h"
#include "ma_key_recover.h"

49 50 51 52 53 54 55 56 57
#include <stdarg.h>
#include <my_getopt.h>
#ifdef HAVE_SYS_VADVISE_H
#include <sys/vadvise.h>
#endif
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif

unknown's avatar
unknown committed
58
/* Functions defined in this file */
59

unknown's avatar
unknown committed
60
static int check_k_link(HA_CHECK *param, MARIA_HA *info, my_off_t next_link);
61
static int chk_index(HA_CHECK *param, MARIA_HA *info,MARIA_KEYDEF *keyinfo,
unknown's avatar
unknown committed
62
		     my_off_t page, uchar *buff, ha_rows *keys,
63 64 65
		     ha_checksum *key_checksum, uint level);
static uint isam_key_length(MARIA_HA *info,MARIA_KEYDEF *keyinfo);
static ha_checksum calc_checksum(ha_rows count);
66
static int writekeys(MARIA_SORT_PARAM *sort_param);
unknown's avatar
unknown committed
67 68
static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
                          MARIA_KEYDEF *keyinfo,
69
			  my_off_t pagepos, File new_file);
unknown's avatar
unknown committed
70 71
static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
72
static int sort_get_next_record(MARIA_SORT_PARAM *sort_param);
unknown's avatar
unknown committed
73 74 75
static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
                        const void *b);
static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
unknown's avatar
unknown committed
76 77
                                   const uchar *a);
static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a);
78
static my_off_t get_record_for_key(MARIA_KEYDEF *keyinfo, const uchar *key);
79 80
static int sort_insert_key(MARIA_SORT_PARAM  *sort_param,
                           reg1 SORT_KEY_BLOCKS *key_block,
unknown's avatar
unknown committed
81
			   const uchar *key, my_off_t prev_block);
82 83 84 85
static int sort_delete_record(MARIA_SORT_PARAM *sort_param);
/*static int _ma_flush_pending_blocks(HA_CHECK *param);*/
static SORT_KEY_BLOCKS	*alloc_key_blocks(HA_CHECK *param, uint blocks,
					  uint buffer_length);
unknown's avatar
unknown committed
86
static ha_checksum maria_byte_checksum(const uchar *buf, uint length);
87
static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share);
unknown's avatar
unknown committed
88
static void restore_data_file_type(MARIA_SHARE *share);
89
static void change_data_file_descriptor(MARIA_HA *info, File new_file);
90
static void unuse_data_file_descriptor(MARIA_HA *info);
91
static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
92
                                      MARIA_HA *info, uchar *record);
93 94
static void copy_data_file_state(MARIA_STATE_INFO *to,
                                 MARIA_STATE_INFO *from);
unknown's avatar
unknown committed
95 96
static void report_keypage_fault(HA_CHECK *param, MARIA_HA *info,
                                 my_off_t position);
unknown's avatar
unknown committed
97
static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file);
unknown's avatar
unknown committed
98 99
static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
                                                 MARIA_HA *info);
100
static TrID max_trid_in_system(void);
unknown's avatar
unknown committed
101
static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid);
102
void retry_if_quick(MARIA_SORT_PARAM *param, int error);
103

104

unknown's avatar
unknown committed
105 106
/* Initialize check param with default values */

unknown's avatar
unknown committed
107
void maria_chk_init(HA_CHECK *param)
108
{
unknown's avatar
unknown committed
109
  bzero((uchar*) param,sizeof(*param));
110 111 112 113 114 115 116 117 118 119 120 121 122
  param->opt_follow_links=1;
  param->keys_in_use= ~(ulonglong) 0;
  param->search_after_block=HA_OFFSET_ERROR;
  param->auto_increment_value= 0;
  param->use_buffers=USE_BUFFER_INIT;
  param->read_buffer_length=READ_BUFFER_INIT;
  param->write_buffer_length=READ_BUFFER_INIT;
  param->sort_buffer_length=SORT_BUFFER_INIT;
  param->sort_key_blocks=BUFFERS_WHEN_SORTING;
  param->tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
  param->myf_rw=MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL);
  param->start_check_pos=0;
  param->max_record_length= LONGLONG_MAX;
unknown's avatar
unknown committed
123
  param->pagecache_block_size= KEY_CACHE_BLOCK_SIZE;
124 125 126
  param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
}

unknown's avatar
unknown committed
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147

/* Initialize check param and maria handler for check of table */

void maria_chk_init_for_check(HA_CHECK *param, MARIA_HA *info)
{
  param->not_visible_rows_found= 0;
  param->max_found_trid= 0;

  /*
    Set up transaction handler so that we can see all rows. When rows is read
    we will check the found id against param->max_tried
  */
  if (!ma_control_file_inited())
    param->max_trid= 0;                 /* Give warning for first trid found */
  else
    param->max_trid= max_trid_in_system();

  maria_ignore_trids(info);
}


148 149
	/* Check the status flags for the table */

unknown's avatar
unknown committed
150
int maria_chk_status(HA_CHECK *param, MARIA_HA *info)
151
{
152
  MARIA_SHARE *share= info->s;
153 154 155 156 157 158 159

  if (maria_is_crashed_on_repair(info))
    _ma_check_print_warning(param,
			   "Table is marked as crashed and last repair failed");
  else if (maria_is_crashed(info))
    _ma_check_print_warning(param,
			   "Table is marked as crashed");
160
  if (share->state.open_count != (uint) (share->global_changed ? 1 : 0))
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
  {
    /* Don't count this as a real warning, as check can correct this ! */
    uint save=param->warning_printed;
    _ma_check_print_warning(param,
			   share->state.open_count==1 ?
			   "%d client is using or hasn't closed the table properly" :
			   "%d clients are using or haven't closed the table properly",
			   share->state.open_count);
    /* If this will be fixed by the check, forget the warning */
    if (param->testflag & T_UPDATE_STATE)
      param->warning_printed=save;
  }
  return 0;
}

unknown's avatar
unknown committed
176 177 178
/*
  Check delete links in row data
*/
179

180 181
int maria_chk_del(HA_CHECK *param, register MARIA_HA *info,
                  ulonglong test_flag)
182
{
183
  MARIA_SHARE *share= info->s;
184 185 186 187 188 189 190
  reg2 ha_rows i;
  uint delete_link_length;
  my_off_t empty,next_link,old_link;
  char buff[22],buff2[22];
  DBUG_ENTER("maria_chk_del");

  LINT_INIT(old_link);
unknown's avatar
unknown committed
191

192 193
  param->record_checksum=0;

194
  if (share->data_file_type == BLOCK_RECORD)
unknown's avatar
unknown committed
195 196
    DBUG_RETURN(0);                             /* No delete links here */

197 198
  delete_link_length=((share->options & HA_OPTION_PACK_RECORD) ? 20 :
		      share->rec_reflength+1);
199 200 201 202

  if (!(test_flag & T_SILENT))
    puts("- check record delete-chain");

203
  next_link=share->state.dellink;
204
  if (share->state.state.del == 0)
205 206 207 208 209 210 211 212 213 214 215
  {
    if (test_flag & T_VERBOSE)
    {
      puts("No recordlinks");
    }
  }
  else
  {
    if (test_flag & T_VERBOSE)
      printf("Recordlinks:    ");
    empty=0;
216
    for (i= share->state.state.del ; i > 0L && next_link != HA_OFFSET_ERROR ; i--)
217 218 219 220 221
    {
      if (*_ma_killed_ptr(param))
        DBUG_RETURN(1);
      if (test_flag & T_VERBOSE)
	printf(" %9s",llstr(next_link,buff));
222
      if (next_link >= share->state.state.data_file_length)
223
	goto wrong;
224
      if (my_pread(info->dfile.file, (uchar*) buff, delete_link_length,
225 226 227 228 229 230 231 232 233 234 235 236 237 238
		   next_link,MYF(MY_NABP)))
      {
	if (test_flag & T_VERBOSE) puts("");
	_ma_check_print_error(param,"Can't read delete-link at filepos: %s",
		    llstr(next_link,buff));
	DBUG_RETURN(1);
      }
      if (*buff != '\0')
      {
	if (test_flag & T_VERBOSE) puts("");
	_ma_check_print_error(param,"Record at pos: %s is not remove-marked",
		    llstr(next_link,buff));
	goto wrong;
      }
239
      if (share->options & HA_OPTION_PACK_RECORD)
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
      {
	my_off_t prev_link=mi_sizekorr(buff+12);
	if (empty && prev_link != old_link)
	{
	  if (test_flag & T_VERBOSE) puts("");
	  _ma_check_print_error(param,"Deleted block at %s doesn't point back at previous delete link",llstr(next_link,buff2));
	  goto wrong;
	}
	old_link=next_link;
	next_link=mi_sizekorr(buff+4);
	empty+=mi_uint3korr(buff+1);
      }
      else
      {
	param->record_checksum+=(ha_checksum) next_link;
255
	next_link= _ma_rec_pos(share, (uchar *) buff + 1);
256
	empty+=share->base.pack_reclength;
257 258
      }
    }
259
    if (share->state.state.del && (test_flag & T_VERBOSE))
260
      puts("\n");
261
    if (empty != share->state.state.empty)
262 263 264 265
    {
      _ma_check_print_warning(param,
			     "Found %s deleted space in delete link chain. Should be %s",
			     llstr(empty,buff2),
266
			     llstr(share->state.state.empty,buff));
267 268 269 270 271
    }
    if (next_link != HA_OFFSET_ERROR)
    {
      _ma_check_print_error(param,
			   "Found more than the expected %s deleted rows in delete link chain",
272
			   llstr(share->state.state.del, buff));
273 274 275 276 277 278
      goto wrong;
    }
    if (i != 0)
    {
      _ma_check_print_error(param,
			   "Found %s deleted rows in delete link chain. Should be %s",
279 280
			   llstr(share->state.state.del - i, buff2),
			   llstr(share->state.state.del, buff));
281 282 283 284 285 286 287
      goto wrong;
    }
  }
  DBUG_RETURN(0);

wrong:
  param->testflag|=T_RETRY_WITHOUT_QUICK;
288 289
  if (test_flag & T_VERBOSE)
    puts("");
290 291 292 293 294
  _ma_check_print_error(param,"record delete-link-chain corrupted");
  DBUG_RETURN(1);
} /* maria_chk_del */


unknown's avatar
unknown committed
295
/* Check delete links in index file */
296

297
static int check_k_link(HA_CHECK *param, register MARIA_HA *info,
unknown's avatar
unknown committed
298
                        my_off_t next_link)
299
{
300 301
  MARIA_SHARE *share= info->s;
  uint block_size= share->block_size;
302
  ha_rows records;
303 304
  char llbuff[21], llbuff2[21];
  uchar *buff;
305 306
  DBUG_ENTER("check_k_link");

307 308 309
  if (next_link == HA_OFFSET_ERROR)
    DBUG_RETURN(0);                             /* Avoid printing empty line */

310
  records= (ha_rows) (share->state.state.key_file_length / block_size);
311 312 313 314 315 316
  while (next_link != HA_OFFSET_ERROR && records > 0)
  {
    if (*_ma_killed_ptr(param))
      DBUG_RETURN(1);
    if (param->testflag & T_VERBOSE)
      printf("%16s",llstr(next_link,llbuff));
317 318

    /* Key blocks must lay within the key file length entirely. */
319
    if (next_link + block_size > share->state.state.key_file_length)
320 321 322 323 324
    {
      /* purecov: begin tested */
      _ma_check_print_error(param, "Invalid key block position: %s  "
                            "key block size: %u  file_length: %s",
                            llstr(next_link, llbuff), block_size,
325
                            llstr(share->state.state.key_file_length, llbuff2));
326 327 328
      DBUG_RETURN(1);
      /* purecov: end */
    }
329

unknown's avatar
unknown committed
330 331
    /* Key blocks must be aligned at block_size */
    if (next_link & (block_size -1))
332 333 334 335
    {
      /* purecov: begin tested */
      _ma_check_print_error(param, "Mis-aligned key block: %s  "
                            "minimum key block length: %u",
unknown's avatar
unknown committed
336 337
                            llstr(next_link, llbuff),
                            block_size);
338
      DBUG_RETURN(1);
339 340
      /* purecov: end */
    }
unknown's avatar
unknown committed
341

342 343
    DBUG_ASSERT(share->pagecache->block_size == block_size);
    if (!(buff= pagecache_read(share->pagecache,
344 345
                               &share->kfile,
                               (pgcache_page_no_t) (next_link / block_size),
unknown's avatar
unknown committed
346
                               DFLT_INIT_HITS,
unknown's avatar
unknown committed
347
                               (uchar*) info->buff,
348
                               PAGECACHE_READ_UNKNOWN_PAGE,
unknown's avatar
unknown committed
349
                               PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
350 351 352 353
    {
      /* purecov: begin tested */
      _ma_check_print_error(param, "key cache read error for block: %s",
                            llstr(next_link,llbuff));
354
      DBUG_RETURN(1);
355 356
      /* purecov: end */
    }
357
    if (_ma_get_keynr(info->s, buff) != MARIA_DELETE_KEY_NR)
358 359 360
      _ma_check_print_error(param, "Page at %s is not delete marked",
                            llstr(next_link, llbuff));

361
    next_link= mi_sizekorr(buff + share->keypage_header);
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
    records--;
    param->key_file_blocks+=block_size;
  }
  if (param->testflag & T_VERBOSE)
  {
    if (next_link != HA_OFFSET_ERROR)
      printf("%16s\n",llstr(next_link,llbuff));
    else
      puts("");
  }
  DBUG_RETURN (next_link != HA_OFFSET_ERROR);
} /* check_k_link */


	/* Check sizes of files */

int maria_chk_size(HA_CHECK *param, register MARIA_HA *info)
{
380
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
381
  int error;
382 383 384 385
  register my_off_t skr,size;
  char buff[22],buff2[22];
  DBUG_ENTER("maria_chk_size");

unknown's avatar
unknown committed
386 387
  if (!(param->testflag & T_SILENT))
    puts("- check file-size");
388

unknown's avatar
unknown committed
389 390 391 392 393 394 395 396
  /*
    The following is needed if called externally (not from maria_chk).
    To get a correct physical size we need to flush them.
  */
  if ((error= _ma_flush_table_files(info,
                                    MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
                                    FLUSH_FORCE_WRITE, FLUSH_FORCE_WRITE)))
    _ma_check_print_error(param, "Failed to flush data or index file");
397

398
  size= my_seek(share->kfile.file, 0L, MY_SEEK_END, MYF(MY_THREADSAFE));
399
  if ((skr=(my_off_t) share->state.state.key_file_length) != size)
400 401
  {
    /* Don't give error if file generated by mariapack */
402
    if (skr > size && maria_is_any_key_active(share->state.key_map))
403 404 405 406 407 408
    {
      error=1;
      _ma_check_print_error(param,
			   "Size of indexfile is: %-8s        Should be: %s",
			   llstr(size,buff), llstr(skr,buff2));
    }
unknown's avatar
unknown committed
409
    else if (!(param->testflag & T_VERY_SILENT))
410 411 412 413 414
      _ma_check_print_warning(param,
			     "Size of indexfile is: %-8s      Should be: %s",
			     llstr(size,buff), llstr(skr,buff2));
  }
  if (!(param->testflag & T_VERY_SILENT) &&
415
      ! (share->options & HA_OPTION_COMPRESS_RECORD) &&
416
      ulonglong2double(share->state.state.key_file_length) >
417
      ulonglong2double(share->base.margin_key_file_length)*0.9)
418
    _ma_check_print_warning(param,"Keyfile is almost full, %10s of %10s used",
419
			   llstr(share->state.state.key_file_length,buff),
420
			   llstr(share->base.max_key_file_length-1,buff));
421

unknown's avatar
unknown committed
422
  size= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
423
  skr=(my_off_t) share->state.state.data_file_length;
424
  if (share->options & HA_OPTION_COMPRESS_RECORD)
425 426
    skr+= MEMMAP_EXTRA_MARGIN;
#ifdef USE_RELOC
427
  if (share->data_file_type == STATIC_RECORD &&
428 429
      skr < (my_off_t) share->base.reloc*share->base.min_pack_length)
    skr=(my_off_t) share->base.reloc*share->base.min_pack_length;
430 431 432 433 434
#endif
  if (skr != size)
  {
    if (skr > size && skr != size + MEMMAP_EXTRA_MARGIN)
    {
435
      share->state.state.data_file_length=size;	/* Skip other errors */
436 437 438 439 440 441 442 443 444 445 446 447 448
      error=1;
      _ma_check_print_error(param,"Size of datafile is: %-9s         Should be: %s",
		    llstr(size,buff), llstr(skr,buff2));
      param->testflag|=T_RETRY_WITHOUT_QUICK;
    }
    else
    {
      _ma_check_print_warning(param,
			     "Size of datafile is: %-9s       Should be: %s",
			     llstr(size,buff), llstr(skr,buff2));
    }
  }
  if (!(param->testflag & T_VERY_SILENT) &&
449
      !(share->options & HA_OPTION_COMPRESS_RECORD) &&
450
      ulonglong2double(share->state.state.data_file_length) >
451
      (ulonglong2double(share->base.max_data_file_length)*0.9))
452
    _ma_check_print_warning(param, "Datafile is almost full, %10s of %10s used",
453
			   llstr(share->state.state.data_file_length,buff),
454
			   llstr(share->base.max_data_file_length-1,buff2));
455 456 457 458
  DBUG_RETURN(error);
} /* maria_chk_size */


unknown's avatar
unknown committed
459
/* Check keys */
460 461 462 463 464 465 466

int maria_chk_key(HA_CHECK *param, register MARIA_HA *info)
{
  uint key,found_keys=0,full_text_keys=0,result=0;
  ha_rows keys;
  ha_checksum old_record_checksum,init_checksum;
  my_off_t all_keydata,all_totaldata,key_totlength,length;
467
  double  *rec_per_key_part;
468
  MARIA_SHARE *share= info->s;
469 470 471 472 473 474 475
  MARIA_KEYDEF *keyinfo;
  char buff[22],buff2[22];
  DBUG_ENTER("maria_chk_key");

  if (!(param->testflag & T_SILENT))
    puts("- check key delete-chain");

476 477
  param->key_file_blocks=share->base.keystart;
  if (check_k_link(param, info, share->state.key_del))
unknown's avatar
unknown committed
478 479 480 481 482
  {
    if (param->testflag & T_VERBOSE) puts("");
    _ma_check_print_error(param,"key delete-link-chain corrupted");
    DBUG_RETURN(-1);
  }
483

484 485
  if (!(param->testflag & T_SILENT))
    puts("- check index reference");
486 487 488

  all_keydata=all_totaldata=key_totlength=0;
  init_checksum=param->record_checksum;
489
  old_record_checksum=0;
unknown's avatar
unknown committed
490
  if (share->data_file_type == STATIC_RECORD)
491 492
    old_record_checksum= (calc_checksum(share->state.state.records +
                                        share->state.state.del-1) *
unknown's avatar
unknown committed
493
                          share->base.pack_reclength);
494
  rec_per_key_part= param->new_rec_per_key_part;
495 496 497 498 499 500 501 502 503
  for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
       rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++)
  {
    param->key_crc[key]=0;
    if (! maria_is_key_active(share->state.key_map, key))
    {
      /* Remember old statistics for key */
      memcpy((char*) rec_per_key_part,
	     (char*) (share->state.rec_per_key_part +
504
		      (uint) (rec_per_key_part - param->new_rec_per_key_part)),
505 506 507 508 509 510 511 512 513 514 515 516
	     keyinfo->keysegs*sizeof(*rec_per_key_part));
      continue;
    }
    found_keys++;

    param->record_checksum=init_checksum;

    bzero((char*) &param->unique_count,sizeof(param->unique_count));
    bzero((char*) &param->notnull_count,sizeof(param->notnull_count));

    if ((!(param->testflag & T_SILENT)))
      printf ("- check data record references index: %d\n",key+1);
517
    if (keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL))
518
      full_text_keys++;
519 520
    if (share->state.key_root[key] == HA_OFFSET_ERROR)
    {
521
      if (share->state.state.records != 0 && !(keyinfo->flag & HA_FULLTEXT))
522
        _ma_check_print_error(param, "Key tree %u is empty", key + 1);
523
      goto do_stat;
524 525 526 527
    }
    if (!_ma_fetch_keypage(info, keyinfo, share->state.key_root[key],
                           PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
                           info->buff, 0, 0))
528
    {
unknown's avatar
unknown committed
529
      report_keypage_fault(param, info, share->state.key_root[key]);
530 531 532 533 534 535 536 537 538 539 540 541 542
      if (!(param->testflag & T_INFO))
	DBUG_RETURN(-1);
      result= -1;
      continue;
    }
    param->key_file_blocks+=keyinfo->block_length;
    keys=0;
    param->keydata=param->totaldata=0;
    param->key_blocks=0;
    param->max_level=0;
    if (chk_index(param,info,keyinfo,share->state.key_root[key],info->buff,
		  &keys, param->key_crc+key,1))
      DBUG_RETURN(-1);
543
    if (!(keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL | HA_RTREE_INDEX)))
544
    {
545
      if (keys != share->state.state.records)
546 547
      {
	_ma_check_print_error(param,"Found %s keys of %s",llstr(keys,buff),
548
		    llstr(share->state.state.records,buff2));
549 550 551 552 553
	if (!(param->testflag & T_INFO))
	DBUG_RETURN(-1);
	result= -1;
	continue;
      }
unknown's avatar
unknown committed
554 555 556 557
      if ((found_keys - full_text_keys == 1 &&
           !(share->data_file_type == STATIC_RECORD)) ||
          (param->testflag & T_DONT_CHECK_CHECKSUM))
	old_record_checksum= param->record_checksum;
558 559 560
      else if (old_record_checksum != param->record_checksum)
      {
	if (key)
561 562 563
	  _ma_check_print_error(param,
                                "Key %u doesn't point at same records as "
                                "key 1",
564 565 566 567 568 569 570 571 572 573 574 575
		      key+1);
	else
	  _ma_check_print_error(param,"Key 1 doesn't point at all records");
	if (!(param->testflag & T_INFO))
	  DBUG_RETURN(-1);
	result= -1;
	continue;
      }
    }
    if ((uint) share->base.auto_key -1 == key)
    {
      /* Check that auto_increment key is bigger than max key value */
576
      ulonglong auto_increment;
577
      const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
578
      info->lastinx=key;
unknown's avatar
unknown committed
579
      _ma_read_key_record(info, info->rec_buff, 0);
580 581 582
      auto_increment=
        ma_retrieve_auto_increment(info->rec_buff + keyseg->start,
                                   keyseg->type);
583
      if (auto_increment > share->state.auto_increment)
584
      {
585 586
	_ma_check_print_warning(param, "Auto-increment value: %s is smaller "
                                "than max used value: %s",
587
                                llstr(share->state.auto_increment,buff2),
588
                                llstr(auto_increment, buff));
589 590 591
      }
      if (param->testflag & T_AUTO_INC)
      {
592
        set_if_bigger(share->state.auto_increment,
593
                      auto_increment);
594
        set_if_bigger(share->state.auto_increment,
595
                      param->auto_increment_value);
596 597 598 599
      }

      /* Check that there isn't a row with auto_increment = 0 in the table */
      maria_extra(info,HA_EXTRA_KEYREAD,0);
600 601 602 603
      bzero(info->lastkey_buff, keyinfo->seg->length);
      if (!maria_rkey(info, info->rec_buff, key,
                      info->lastkey_buff,
                      (key_part_map) 1, HA_READ_KEY_EXACT))
604
      {
unknown's avatar
unknown committed
605
	/* Don't count this as a real warning, as maria_chk can't correct it */
606
	uint save=param->warning_printed;
607 608
	_ma_check_print_warning(param, "Found row where the auto_increment "
                                "column has the value 0");
609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628
	param->warning_printed=save;
      }
      maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
    }

    length=(my_off_t) isam_key_length(info,keyinfo)*keys + param->key_blocks*2;
    if (param->testflag & T_INFO && param->totaldata != 0L && keys != 0L)
      printf("Key: %2d:  Keyblocks used: %3d%%  Packed: %4d%%  Max levels: %2d\n",
	     key+1,
	     (int) (my_off_t2double(param->keydata)*100.0/my_off_t2double(param->totaldata)),
	     (int) ((my_off_t2double(length) - my_off_t2double(param->keydata))*100.0/
		    my_off_t2double(length)),
	     param->max_level);
    all_keydata+=param->keydata; all_totaldata+=param->totaldata; key_totlength+=length;

do_stat:
    if (param->testflag & T_STATISTICS)
      maria_update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
                       param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
                       param->notnull_count: NULL,
629
                       (ulonglong)share->state.state.records);
630 631 632 633 634 635 636 637 638 639 640 641 642
  }
  if (param->testflag & T_INFO)
  {
    if (all_totaldata != 0L && found_keys > 0)
      printf("Total:    Keyblocks used: %3d%%  Packed: %4d%%\n\n",
	     (int) (my_off_t2double(all_keydata)*100.0/
		    my_off_t2double(all_totaldata)),
	     (int) ((my_off_t2double(key_totlength) -
		     my_off_t2double(all_keydata))*100.0/
		     my_off_t2double(key_totlength)));
    else if (all_totaldata != 0L && maria_is_any_key_active(share->state.key_map))
      puts("");
  }
643
  if (param->key_file_blocks != share->state.state.key_file_length &&
644
      share->state.key_map == ~(ulonglong) 0)
645 646 647 648 649 650 651 652 653
    _ma_check_print_warning(param, "Some data are unreferenced in keyfile");
  if (found_keys != full_text_keys)
    param->record_checksum=old_record_checksum-init_checksum;	/* Remove delete links */
  else
    param->record_checksum=0;
  DBUG_RETURN(result);
} /* maria_chk_key */


654

unknown's avatar
unknown committed
655 656
static int chk_index_down(HA_CHECK *param, MARIA_HA *info,
                          MARIA_KEYDEF *keyinfo,
unknown's avatar
unknown committed
657
                          my_off_t page, uchar *buff, ha_rows *keys,
unknown's avatar
unknown committed
658
                          ha_checksum *key_checksum, uint level)
659 660
{
  char llbuff[22],llbuff2[22];
661
  MARIA_SHARE *share= info->s;
662
  DBUG_ENTER("chk_index_down");
663

664
  /* Key blocks must lay within the key file length entirely. */
665
  if (page + keyinfo->block_length > share->state.state.key_file_length)
666 667 668
  {
    /* purecov: begin tested */
    /* Give it a chance to fit in the real file size. */
669 670
    my_off_t max_length= my_seek(info->s->kfile.file, 0L, MY_SEEK_END,
                                 MYF(MY_THREADSAFE));
671 672 673
    _ma_check_print_error(param, "Invalid key block position: %s  "
                          "key block size: %u  file_length: %s",
                          llstr(page, llbuff), keyinfo->block_length,
674
                          llstr(share->state.state.key_file_length, llbuff2));
675
    if (page + keyinfo->block_length > max_length)
676
      goto err;
unknown's avatar
unknown committed
677
    /* Fix the remembered key file length. */
678 679 680
    share->state.state.key_file_length= (max_length &
                                          ~ (my_off_t) (keyinfo->block_length -
                                                        1));
681
    /* purecov: end */
682
  }
683

unknown's avatar
unknown committed
684 685
  /* Key blocks must be aligned at block length */
  if (page & (info->s->block_size -1))
686 687 688
  {
    /* purecov: begin tested */
    _ma_check_print_error(param, "Mis-aligned key block: %s  "
689
                          "key block length: %u",
unknown's avatar
unknown committed
690
                          llstr(page, llbuff), info->s->block_size);
691 692 693 694
    goto err;
    /* purecov: end */
  }

695 696
  if (!_ma_fetch_keypage(info, keyinfo, page, PAGECACHE_LOCK_LEFT_UNLOCKED,
                         DFLT_INIT_HITS, buff, 0, 0))
697
  {
unknown's avatar
unknown committed
698
    report_keypage_fault(param, info, page);
699 700 701 702 703 704
    goto err;
  }
  param->key_file_blocks+=keyinfo->block_length;
  if (chk_index(param,info,keyinfo,page,buff,keys,key_checksum,level))
    goto err;

705 706 707
  DBUG_RETURN(0);

  /* purecov: begin tested */
708
err:
709 710
  DBUG_RETURN(1);
  /* purecov: end */
711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730
}


/*
  "Ignore NULLs" statistics collection method: process first index tuple.

  SYNOPSIS
    maria_collect_stats_nonulls_first()
      keyseg   IN     Array of key part descriptions
      notnull  INOUT  Array, notnull[i] = (number of {keypart1...keypart_i}
                                           tuples that don't contain NULLs)
      key      IN     Key values tuple

  DESCRIPTION
    Process the first index tuple - find out which prefix tuples don't
    contain NULLs, and update the array of notnull counters accordingly.
*/

static
void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
unknown's avatar
unknown committed
731
                                       const uchar *key)
732 733
{
  uint first_null, kp;
734
  first_null= ha_find_null(keyseg, key) - keyseg;
735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770
  /*
    All prefix tuples that don't include keypart_{first_null} are not-null
    tuples (and all others aren't), increment counters for them.
  */
  for (kp= 0; kp < first_null; kp++)
    notnull[kp]++;
}


/*
  "Ignore NULLs" statistics collection method: process next index tuple.

  SYNOPSIS
    maria_collect_stats_nonulls_next()
      keyseg   IN     Array of key part descriptions
      notnull  INOUT  Array, notnull[i] = (number of {keypart1...keypart_i}
                                           tuples that don't contain NULLs)
      prev_key IN     Previous key values tuple
      last_key IN     Next key values tuple

  DESCRIPTION
    Process the next index tuple:
    1. Find out which prefix tuples of last_key don't contain NULLs, and
       update the array of notnull counters accordingly.
    2. Find the first keypart number where the prev_key and last_key tuples
       are different(A), or last_key has NULL value(B), and return it, so the
       caller can count number of unique tuples for each key prefix. We don't
       need (B) to be counted, and that is compensated back in
       maria_update_key_parts().

  RETURN
    1 + number of first keypart where values differ or last_key tuple has NULL
*/

static
int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
unknown's avatar
unknown committed
771 772
                                     const uchar *prev_key,
                                     const uchar *last_key)
773 774 775 776 777 778 779 780 781 782 783 784 785
{
  uint diffs[2];
  uint first_null_seg, kp;
  HA_KEYSEG *seg;

  /*
     Find the first keypart where values are different or either of them is
     NULL. We get results in diffs array:
     diffs[0]= 1 + number of first different keypart
     diffs[1]=offset: (last_key + diffs[1]) points to first value in
                      last_key that is NULL or different from corresponding
                      value in prev_key.
  */
786
  ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY,
787 788 789 790
             SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diffs);
  seg= keyseg + diffs[0] - 1;

  /* Find first NULL in last_key */
791
  first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg;
792 793 794 795 796 797 798 799 800 801 802 803
  for (kp= 0; kp < first_null_seg; kp++)
    notnull[kp]++;

  /*
    Return 1+ number of first key part where values differ. Don't care if
    these were NULLs and not .... We compensate for that in
    maria_update_key_parts.
  */
  return diffs[0];
}


804
/* Check if index is ok */
805 806

static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
unknown's avatar
unknown committed
807
		     my_off_t page, uchar *buff, ha_rows *keys,
808 809 810
		     ha_checksum *key_checksum, uint level)
{
  int flag;
811 812
  uint used_length,comp_flag,page_flag,nod_flag;
  uchar *temp_buff, *keypos, *old_keypos, *endpos;
813
  my_off_t next_page,record;
814
  MARIA_SHARE *share= info->s;
815 816
  char llbuff[22];
  uint diff_pos[2];
817
  uchar tmp_key_buff[MARIA_MAX_KEY_BUFF];
818
  MARIA_KEY tmp_key;
819
  DBUG_ENTER("chk_index");
820
  DBUG_DUMP("buff", buff, _ma_get_page_used(share, buff));
821 822

  /* TODO: implement appropriate check for RTree keys */
823
  if (keyinfo->flag & (HA_SPATIAL | HA_RTREE_INDEX))
824 825
    DBUG_RETURN(0);

unknown's avatar
unknown committed
826
  if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length)))
827 828 829 830 831 832
  {
    _ma_check_print_error(param,"Not enough memory for keyblock");
    DBUG_RETURN(-1);
  }

  if (keyinfo->flag & HA_NOSAME)
833 834 835 836
  {
    /* Not real duplicates */
    comp_flag=SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT;
  }
837 838 839
  else
    comp_flag=SEARCH_SAME;			/* Keys in positionorder */

840 841 842
  page_flag= _ma_get_keypage_flag(share, buff);
  _ma_get_used_and_nod_with_flag(share, page_flag, buff, used_length,
                                 nod_flag);
843 844
  old_keypos= buff + share->keypage_header;
  keypos= old_keypos+ nod_flag;
845 846 847 848
  endpos= buff + used_length;

  param->keydata+=   used_length;
  param->totaldata+= keyinfo->block_length;	/* INFO */
849 850 851 852
  param->key_blocks++;
  if (level > param->max_level)
    param->max_level=level;

853
  if (_ma_get_keynr(share, buff) != (uint) (keyinfo - share->keyinfo))
854 855
    _ma_check_print_error(param, "Page at %s is not marked for index %u",
                          llstr(page, llbuff),
856
                          (uint) (keyinfo - share->keyinfo));
857 858 859 860 861 862 863
  if ((page_flag & KEYPAGE_FLAG_HAS_TRANSID) &&
      !share->base.born_transactional)
  {
    _ma_check_print_error(param,
                          "Page at %s is marked with HAS_TRANSID even if "
                          "table is not transactional",
                          llstr(page, llbuff));
864
  }
865

866
  if (used_length > (uint) keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE)
867
  {
868 869
    _ma_check_print_error(param,"Page at %s has impossible (too big) pagelength",
                          llstr(page,llbuff));
870 871
    goto err;
  }
872 873

  info->last_key.keyinfo= tmp_key.keyinfo= keyinfo;
874
  tmp_key.data= tmp_key_buff;
875 876 877 878 879 880 881 882 883
  for ( ;; )
  {
    if (*_ma_killed_ptr(param))
      goto err;
    if (nod_flag)
    {
      next_page= _ma_kpos(nod_flag,keypos);
      if (chk_index_down(param,info,keyinfo,next_page,
                         temp_buff,keys,key_checksum,level+1))
884 885
      {
        DBUG_DUMP("page_data", old_keypos, (uint) (keypos - old_keypos));
886
	goto err;
887
      }
888 889 890
    }
    old_keypos=keypos;
    if (keypos >= endpos ||
891
	!(*keyinfo->get_key)(&tmp_key, page_flag, nod_flag, &keypos))
892 893 894
      break;
    if (keypos > endpos)
    {
895 896 897
      _ma_check_print_error(param,
                            "Page length and length of keys don't match at "
                            "page: %s",
unknown's avatar
unknown committed
898
                            llstr(page,llbuff));
899 900
      goto err;
    }
901 902 903 904 905 906 907 908 909 910 911
    if (share->data_file_type == BLOCK_RECORD &&
        !(page_flag & KEYPAGE_FLAG_HAS_TRANSID) &&
        key_has_transid(tmp_key.data + tmp_key.data_length +
                        share->rec_reflength-1))
    {
      _ma_check_print_error(param,
                            "Found key marked for transid on page that is not "
                            "marked for transid at: %s",
                            llstr(page,llbuff));
      goto err;
    }
912

913
    if ((*keys)++ &&
914 915 916 917
	(flag=ha_key_cmp(keyinfo->seg, info->last_key.data, tmp_key.data,
                         tmp_key.data_length + tmp_key.ref_length,
                         (comp_flag | SEARCH_INSERT | (tmp_key.flag >> 1) |
                          info->last_key.flag), diff_pos)) >=0)
918
    {
919 920
      DBUG_DUMP_KEY("old", &info->last_key);
      DBUG_DUMP_KEY("new", &tmp_key);
unknown's avatar
unknown committed
921
      DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
922

923
      if ((comp_flag & SEARCH_FIND) && flag == 0)
unknown's avatar
unknown committed
924 925
	_ma_check_print_error(param,"Found duplicated key at page %s",
                              llstr(page,llbuff));
926
      else
unknown's avatar
unknown committed
927 928
	_ma_check_print_error(param,"Key in wrong position at page %s",
                              llstr(page,llbuff));
929 930
      goto err;
    }
931

932 933 934 935 936
    if (param->testflag & T_STATISTICS)
    {
      if (*keys != 1L)				/* not first_key */
      {
        if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
937
          ha_key_cmp(keyinfo->seg, (uchar*) info->last_key.data,
938 939
                     tmp_key.data, tmp_key.data_length,
                     SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
940 941 942 943
                     diff_pos);
        else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
        {
          diff_pos[0]= maria_collect_stats_nonulls_next(keyinfo->seg,
unknown's avatar
unknown committed
944
                                                        param->notnull_count,
945 946
                                                        info->last_key.data,
                                                        tmp_key.data);
947 948 949 950 951 952 953
        }
	param->unique_count[diff_pos[0]-1]++;
      }
      else
      {
        if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
          maria_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
954
                                            tmp_key.data);
955 956
      }
    }
957 958 959 960
    _ma_copy_key(&info->last_key, &tmp_key);
    (*key_checksum)+= maria_byte_checksum(tmp_key.data, tmp_key.data_length);
    record= _ma_row_pos_from_key(&tmp_key);

961 962 963 964
    if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */
    {
      uint off;
      int  subkeys;
965 966
      get_key_full_length_rdonly(off, tmp_key.data);
      subkeys= ft_sintXkorr(tmp_key.data + off);
967 968 969
      if (subkeys < 0)
      {
        ha_rows tmp_keys=0;
970
        if (chk_index_down(param,info,&share->ft2_keyinfo,record,
971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986
                           temp_buff,&tmp_keys,key_checksum,1))
          goto err;
        if (tmp_keys + subkeys)
        {
          _ma_check_print_error(param,
                               "Number of words in the 2nd level tree "
                               "does not match the number in the header. "
                               "Parent word in on the page %s, offset %u",
                               llstr(page,llbuff), (uint) (old_keypos-buff));
          goto err;
        }
        (*keys)+=tmp_keys-1;
        continue;
      }
      /* fall through */
    }
987 988 989 990 991
    if ((share->data_file_type != BLOCK_RECORD &&
         record >= share->state.state.data_file_length) ||
        (share->data_file_type == BLOCK_RECORD &&
         ma_recordpos_to_page(record) * share->base.min_block_length >=
         share->state.state.data_file_length))
992 993 994 995 996 997 998
    {
#ifndef DBUG_OFF
      char llbuff2[22], llbuff3[22];
#endif
      _ma_check_print_error(param,"Found key at page %s that points to record outside datafile",llstr(page,llbuff));
      DBUG_PRINT("test",("page: %s  record: %s  filelength: %s",
			 llstr(page,llbuff),llstr(record,llbuff2),
999
			 llstr(share->state.state.data_file_length,llbuff3)));
1000
      DBUG_DUMP_KEY("key", &tmp_key);
unknown's avatar
unknown committed
1001
      DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
1002 1003
      goto err;
    }
unknown's avatar
unknown committed
1004
    param->record_checksum+= (ha_checksum) record;
1005 1006 1007
  }
  if (keypos != endpos)
  {
1008 1009 1010 1011 1012
    _ma_check_print_error(param,
                          "Keyblock size at page %s is not correct. "
                          "Block length: %u  key length: %u",
                          llstr(page, llbuff), used_length,
                          (uint) (keypos - buff));
1013 1014
    goto err;
  }
unknown's avatar
unknown committed
1015
  my_afree((uchar*) temp_buff);
1016 1017
  DBUG_RETURN(0);
 err:
unknown's avatar
unknown committed
1018
  my_afree((uchar*) temp_buff);
1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064
  DBUG_RETURN(1);
} /* chk_index */


	/* Calculate a checksum of 1+2+3+4...N = N*(N+1)/2 without overflow */

static ha_checksum calc_checksum(ha_rows count)
{
  ulonglong sum,a,b;
  DBUG_ENTER("calc_checksum");

  sum=0;
  a=count; b=count+1;
  if (a & 1)
    b>>=1;
  else
    a>>=1;
  while (b)
  {
    if (b & 1)
      sum+=a;
    a<<=1; b>>=1;
  }
  DBUG_PRINT("exit",("sum: %lx",(ulong) sum));
  DBUG_RETURN((ha_checksum) sum);
} /* calc_checksum */


	/* Calc length of key in normal isam */

static uint isam_key_length(MARIA_HA *info, register MARIA_KEYDEF *keyinfo)
{
  uint length;
  HA_KEYSEG *keyseg;
  DBUG_ENTER("isam_key_length");

  length= info->s->rec_reflength;
  for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++)
    length+= keyseg->length;

  DBUG_PRINT("exit",("length: %d",length));
  DBUG_RETURN(length);
} /* key_length */



unknown's avatar
unknown committed
1065 1066
static void record_pos_to_txt(MARIA_HA *info, my_off_t recpos,
                              char *buff)
1067
{
unknown's avatar
unknown committed
1068 1069 1070
  if (info->s->data_file_type != BLOCK_RECORD)
    llstr(recpos, buff);
  else
1071
  {
unknown's avatar
unknown committed
1072
    my_off_t page= ma_recordpos_to_page(recpos);
1073
    uint row= ma_recordpos_to_dir_entry(recpos);
unknown's avatar
unknown committed
1074 1075 1076
    char *end= longlong10_to_str(page, buff, 10);
    *(end++)= ':';
    longlong10_to_str(row, end, 10);
1077
  }
unknown's avatar
unknown committed
1078
}
1079

unknown's avatar
unknown committed
1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096

/*
  Check that keys in records exist in index tree

  SYNOPSIS
  check_keys_in_record()
  param		Check paramenter
  info		Maria handler
  extend	Type of check (extended or normal)
  start_recpos	Position to row
  record	Record buffer

  NOTES
    This function also calculates record checksum & number of rows
*/

static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend,
unknown's avatar
unknown committed
1097
                                my_off_t start_recpos, uchar *record)
unknown's avatar
unknown committed
1098
{
1099
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
1100 1101
  MARIA_KEYDEF *keyinfo;
  char llbuff[22+4];
1102
  uint keynr;
unknown's avatar
unknown committed
1103 1104 1105 1106

  param->tmp_record_checksum+= (ha_checksum) start_recpos;
  param->records++;
  if (param->testflag & T_WRITE_LOOP && param->records % WRITE_COUNT == 0)
1107
  {
unknown's avatar
unknown committed
1108 1109
    printf("%s\r", llstr(param->records, llbuff));
    VOID(fflush(stdout));
1110
  }
1111

unknown's avatar
unknown committed
1112
  /* Check if keys match the record */
1113 1114
  for (keynr=0, keyinfo= share->keyinfo; keynr < share->base.keys;
       keynr++, keyinfo++)
1115
  {
1116
    if (maria_is_key_active(share->state.key_map, keynr))
1117
    {
1118 1119
      MARIA_KEY key;
      if (!(keyinfo->flag & HA_FULLTEXT))
1120
      {
1121 1122
        (*keyinfo->make_key)(info, &key, keynr, info->lastkey_buff, record,
                             start_recpos, 0);
unknown's avatar
unknown committed
1123 1124 1125 1126 1127 1128 1129
        if (extend)
        {
          /* We don't need to lock the key tree here as we don't allow
             concurrent threads when running maria_chk
          */
          int search_result=
#ifdef HAVE_RTREE_KEYS
1130 1131
            (keyinfo->flag & (HA_SPATIAL | HA_RTREE_INDEX)) ?
            maria_rtree_find_first(info, &key, MBR_EQUAL | MBR_DATA) :
unknown's avatar
unknown committed
1132
#endif
1133
            _ma_search(info, &key, SEARCH_SAME, share->state.key_root[keynr]);
unknown's avatar
unknown committed
1134 1135 1136
          if (search_result)
          {
            record_pos_to_txt(info, start_recpos, llbuff);
unknown's avatar
unknown committed
1137 1138 1139
            _ma_check_print_error(param,
                                  "Record at: %14s  "
                                  "Can't find key for index: %2d",
1140
                                  llbuff, keynr+1);
unknown's avatar
unknown committed
1141 1142 1143 1144 1145
            if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
              return -1;
          }
        }
        else
1146 1147
          param->tmp_key_crc[keynr]+=
            maria_byte_checksum(key.data, key.data_length);
1148 1149 1150
      }
    }
  }
unknown's avatar
unknown committed
1151 1152 1153
  return 0;
}

1154

unknown's avatar
unknown committed
1155 1156 1157 1158 1159
/*
  Functions to loop through all rows and check if they are ok

  NOTES
    One function for each record format
1160

unknown's avatar
unknown committed
1161 1162 1163 1164 1165 1166 1167
  RESULT
    0  ok
    -1 Interrupted by user
    1  Error
*/

static int check_static_record(HA_CHECK *param, MARIA_HA *info, int extend,
unknown's avatar
unknown committed
1168
                               uchar *record)
unknown's avatar
unknown committed
1169
{
1170
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
1171 1172 1173
  my_off_t start_recpos, pos;
  char llbuff[22];

1174
  pos= 0;
1175
  while (pos < share->state.state.data_file_length)
1176 1177
  {
    if (*_ma_killed_ptr(param))
unknown's avatar
unknown committed
1178
      return -1;
unknown's avatar
unknown committed
1179
    if (my_b_read(&param->read_cache,(uchar*) record,
1180
                  share->base.pack_reclength))
unknown's avatar
unknown committed
1181 1182 1183 1184 1185 1186 1187
    {
      _ma_check_print_error(param,
                            "got error: %d when reading datafile at position: %s",
                            my_errno, llstr(pos, llbuff));
      return 1;
    }
    start_recpos= pos;
1188
    pos+= share->base.pack_reclength;
unknown's avatar
unknown committed
1189 1190 1191 1192
    param->splits++;
    if (*record == '\0')
    {
      param->del_blocks++;
1193
      param->del_length+= share->base.pack_reclength;
unknown's avatar
unknown committed
1194 1195 1196
      continue;					/* Record removed */
    }
    param->glob_crc+= _ma_static_checksum(info,record);
1197
    param->used+= share->base.pack_reclength;
unknown's avatar
unknown committed
1198 1199 1200 1201 1202 1203 1204 1205
    if (check_keys_in_record(param, info, extend, start_recpos, record))
      return 1;
  }
  return 0;
}


static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
unknown's avatar
unknown committed
1206
                                uchar *record)
unknown's avatar
unknown committed
1207 1208
{
  MARIA_BLOCK_INFO block_info;
1209
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
1210
  my_off_t start_recpos, start_block, pos;
unknown's avatar
unknown committed
1211
  uchar *to;
unknown's avatar
unknown committed
1212 1213 1214 1215 1216
  ulong left_length;
  uint	b_type;
  char llbuff[22],llbuff2[22],llbuff3[22];
  DBUG_ENTER("check_dynamic_record");

unknown's avatar
unknown committed
1217 1218 1219 1220
  LINT_INIT(left_length);
  LINT_INIT(start_recpos);
  LINT_INIT(to);

1221
  pos= 0;
1222
  while (pos < share->state.state.data_file_length)
unknown's avatar
unknown committed
1223 1224 1225 1226 1227 1228 1229 1230 1231 1232
  {
    my_bool got_error= 0;
    int flag;
    if (*_ma_killed_ptr(param))
      DBUG_RETURN(-1);

    flag= block_info.second_read=0;
    block_info.next_filepos=pos;
    do
    {
unknown's avatar
unknown committed
1233
      if (_ma_read_cache(&param->read_cache,(uchar*) block_info.header,
unknown's avatar
unknown committed
1234 1235 1236
                         (start_block=block_info.next_filepos),
                         sizeof(block_info.header),
                         (flag ? 0 : READING_NEXT) | READING_HEADER))
1237
      {
unknown's avatar
unknown committed
1238
        _ma_check_print_error(param,
unknown's avatar
unknown committed
1239 1240
                              "got error: %d when reading datafile at "
                              "position: %s",
unknown's avatar
unknown committed
1241 1242
                              my_errno, llstr(start_block, llbuff));
        DBUG_RETURN(1);
1243
      }
unknown's avatar
unknown committed
1244 1245

      if (start_block & (MARIA_DYN_ALIGN_SIZE-1))
1246
      {
unknown's avatar
unknown committed
1247 1248 1249 1250 1251 1252 1253
        _ma_check_print_error(param,"Wrong aligned block at %s",
                              llstr(start_block,llbuff));
        DBUG_RETURN(1);
      }
      b_type= _ma_get_block_info(&block_info,-1,start_block);
      if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
                    BLOCK_FATAL_ERROR))
1254
      {
unknown's avatar
unknown committed
1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268
        if (b_type & BLOCK_SYNC_ERROR)
        {
          if (flag)
          {
            _ma_check_print_error(param,"Unexpected byte: %d at link: %s",
                                  (int) block_info.header[0],
                                  llstr(start_block,llbuff));
            DBUG_RETURN(1);
          }
          pos=block_info.filepos+block_info.block_len;
          goto next;
        }
        if (b_type & BLOCK_DELETED)
        {
1269
          if (block_info.block_len < share->base.min_block_length)
unknown's avatar
unknown committed
1270 1271 1272 1273 1274 1275 1276
          {
            _ma_check_print_error(param,
                                  "Deleted block with impossible length %lu at %s",
                                  block_info.block_len,llstr(pos,llbuff));
            DBUG_RETURN(1);
          }
          if ((block_info.next_filepos != HA_OFFSET_ERROR &&
1277
               block_info.next_filepos >= share->state.state.data_file_length) ||
unknown's avatar
unknown committed
1278
              (block_info.prev_filepos != HA_OFFSET_ERROR &&
1279
               block_info.prev_filepos >= share->state.state.data_file_length))
unknown's avatar
unknown committed
1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295
          {
            _ma_check_print_error(param,"Delete link points outside datafile at %s",
                                  llstr(pos,llbuff));
            DBUG_RETURN(1);
          }
          param->del_blocks++;
          param->del_length+= block_info.block_len;
          param->splits++;
          pos= block_info.filepos+block_info.block_len;
          goto next;
        }
        _ma_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s",
                              block_info.header[0],block_info.header[1],
                              block_info.header[2],
                              llstr(start_block,llbuff));
        DBUG_RETURN(1);
1296
      }
1297
      if (share->state.state.data_file_length < block_info.filepos+
unknown's avatar
unknown committed
1298
          block_info.block_len)
1299
      {
unknown's avatar
unknown committed
1300 1301 1302 1303 1304
        _ma_check_print_error(param,
                              "Recordlink that points outside datafile at %s",
                              llstr(pos,llbuff));
        got_error=1;
        break;
1305
      }
unknown's avatar
unknown committed
1306 1307
      param->splits++;
      if (!flag++)				/* First block */
1308
      {
unknown's avatar
unknown committed
1309 1310
        start_recpos=pos;
        pos=block_info.filepos+block_info.block_len;
1311
        if (block_info.rec_len > (uint) share->base.max_pack_length)
unknown's avatar
unknown committed
1312 1313 1314 1315 1316 1317 1318
        {
          _ma_check_print_error(param,"Found too long record (%lu) at %s",
                                (ulong) block_info.rec_len,
                                llstr(start_recpos,llbuff));
          got_error=1;
          break;
        }
1319
        if (share->base.blobs)
unknown's avatar
unknown committed
1320 1321 1322
        {
          if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
                               block_info.rec_len +
1323
                               share->base.extra_rec_buff_size))
1324

unknown's avatar
unknown committed
1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344
          {
            _ma_check_print_error(param,
                                  "Not enough memory (%lu) for blob at %s",
                                  (ulong) block_info.rec_len,
                                  llstr(start_recpos,llbuff));
            got_error=1;
            break;
          }
        }
        to= info->rec_buff;
        left_length= block_info.rec_len;
      }
      if (left_length < block_info.data_len)
      {
        _ma_check_print_error(param,"Found too long record (%lu) at %s",
                              (ulong) block_info.data_len,
                              llstr(start_recpos,llbuff));
        got_error=1;
        break;
      }
unknown's avatar
unknown committed
1345
      if (_ma_read_cache(&param->read_cache,(uchar*) to,block_info.filepos,
unknown's avatar
unknown committed
1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370
                         (uint) block_info.data_len,
                         flag == 1 ? READING_NEXT : 0))
      {
        _ma_check_print_error(param,
                              "got error: %d when reading datafile at position: %s", my_errno, llstr(block_info.filepos, llbuff));

        DBUG_RETURN(1);
      }
      to+=block_info.data_len;
      param->link_used+= block_info.filepos-start_block;
      param->used+= block_info.filepos - start_block + block_info.data_len;
      param->empty+= block_info.block_len-block_info.data_len;
      left_length-= block_info.data_len;
      if (left_length)
      {
        if (b_type & BLOCK_LAST)
        {
          _ma_check_print_error(param,
                                "Wrong record length %s of %s at %s",
                                llstr(block_info.rec_len-left_length,llbuff),
                                llstr(block_info.rec_len, llbuff2),
                                llstr(start_recpos,llbuff3));
          got_error=1;
          break;
        }
1371
        if (share->state.state.data_file_length < block_info.next_filepos)
unknown's avatar
unknown committed
1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389
        {
          _ma_check_print_error(param,
                                "Found next-recordlink that points outside datafile at %s",
                                llstr(block_info.filepos,llbuff));
          got_error=1;
          break;
        }
      }
    } while (left_length);

    if (! got_error)
    {
      if (_ma_rec_unpack(info,record,info->rec_buff,block_info.rec_len) ==
          MY_FILE_ERROR)
      {
        _ma_check_print_error(param,"Found wrong record at %s",
                              llstr(start_recpos,llbuff));
        got_error=1;
1390 1391
      }
      else
unknown's avatar
unknown committed
1392
      {
1393
        ha_checksum checksum= 0;
1394 1395
        if (share->calc_checksum)
          checksum= (*share->calc_checksum)(info, record);
1396

unknown's avatar
unknown committed
1397 1398 1399
        if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE))
        {
          if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len,
1400
                            test(share->calc_checksum), checksum))
unknown's avatar
unknown committed
1401 1402 1403 1404 1405 1406
          {
            _ma_check_print_error(param,"Found wrong packed record at %s",
                                  llstr(start_recpos,llbuff));
            got_error= 1;
          }
        }
1407
        param->glob_crc+= checksum;
unknown's avatar
unknown committed
1408
      }
1409

unknown's avatar
unknown committed
1410 1411 1412 1413
      if (! got_error)
      {
        if (check_keys_in_record(param, info, extend, start_recpos, record))
          DBUG_RETURN(1);
1414 1415
      }
      else
unknown's avatar
unknown committed
1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429
      {
        if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
          DBUG_RETURN(1);
      }
    }
    else if (!flag)
      pos= block_info.filepos+block_info.block_len;
next:;
  }
  DBUG_RETURN(0);
}


static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend,
unknown's avatar
unknown committed
1430
                                   uchar *record)
unknown's avatar
unknown committed
1431
{
1432 1433
  MARIA_BLOCK_INFO block_info;
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
1434 1435
  my_off_t start_recpos, pos;
  char llbuff[22];
1436
  my_bool got_error= 0;
unknown's avatar
unknown committed
1437 1438
  DBUG_ENTER("check_compressed_record");

1439
  pos= share->pack.header_length;             /* Skip header */
1440
  while (pos < share->state.state.data_file_length)
unknown's avatar
unknown committed
1441 1442 1443 1444
  {
    if (*_ma_killed_ptr(param))
      DBUG_RETURN(-1);

unknown's avatar
unknown committed
1445
    if (_ma_read_cache(&param->read_cache,(uchar*) block_info.header, pos,
1446
                       share->pack.ref_length, READING_NEXT))
unknown's avatar
unknown committed
1447 1448 1449 1450 1451 1452 1453 1454 1455
    {
      _ma_check_print_error(param,
                            "got error: %d when reading datafile at position: %s",
                            my_errno, llstr(pos, llbuff));
      DBUG_RETURN(1);
    }

    start_recpos= pos;
    param->splits++;
unknown's avatar
unknown committed
1456
    VOID(_ma_pack_get_block_info(info, &info->bit_buff, &block_info,
unknown's avatar
unknown committed
1457 1458
                                 &info->rec_buff, &info->rec_buff_size, -1,
                                 start_recpos));
unknown's avatar
unknown committed
1459
    pos=block_info.filepos+block_info.rec_len;
1460 1461
    if (block_info.rec_len < (uint) share->min_pack_length ||
        block_info.rec_len > (uint) share->max_pack_length)
unknown's avatar
unknown committed
1462 1463
    {
      _ma_check_print_error(param,
1464
                            "Found block with wrong recordlength: %lu at %s",
unknown's avatar
unknown committed
1465 1466 1467 1468
                            block_info.rec_len, llstr(start_recpos,llbuff));
      got_error=1;
      goto end;
    }
unknown's avatar
unknown committed
1469
    if (_ma_read_cache(&param->read_cache,(uchar*) info->rec_buff,
unknown's avatar
unknown committed
1470 1471 1472 1473 1474 1475 1476
                       block_info.filepos, block_info.rec_len, READING_NEXT))
    {
      _ma_check_print_error(param,
                            "got error: %d when reading datafile at position: %s",
                            my_errno, llstr(block_info.filepos, llbuff));
      DBUG_RETURN(1);
    }
unknown's avatar
unknown committed
1477 1478
    if (_ma_pack_rec_unpack(info, &info->bit_buff, record,
                            info->rec_buff, block_info.rec_len))
unknown's avatar
unknown committed
1479 1480 1481 1482 1483 1484
    {
      _ma_check_print_error(param,"Found wrong record at %s",
                            llstr(start_recpos,llbuff));
      got_error=1;
      goto end;
    }
1485
    param->glob_crc+= (*share->calc_checksum)(info,record);
unknown's avatar
unknown committed
1486 1487 1488 1489
    param->link_used+= (block_info.filepos - start_recpos);
    param->used+= (pos-start_recpos);

end:
1490 1491
    if (! got_error)
    {
unknown's avatar
unknown committed
1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506
      if (check_keys_in_record(param, info, extend, start_recpos, record))
        DBUG_RETURN(1);
    }
    else
    {
      got_error= 0;                             /* Reset for next loop */
      if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
        DBUG_RETURN(1);
    }
  }
  DBUG_RETURN(0);
}


/*
1507
  Check if layout on head or tail page is ok
1508 1509 1510

  NOTES
    This is for rows-in-block format.
unknown's avatar
unknown committed
1511 1512 1513
*/

static int check_page_layout(HA_CHECK *param, MARIA_HA *info,
unknown's avatar
unknown committed
1514
                             my_off_t page_pos, uchar *page,
unknown's avatar
unknown committed
1515
                             uint row_count, uint head_empty,
1516
                             uint *real_rows_found, uint *free_slots_found)
unknown's avatar
unknown committed
1517
{
1518 1519
  uint empty, last_row_end, row, first_dir_entry, free_entry, block_size;
  uint free_entries, prev_free_entry;
unknown's avatar
unknown committed
1520
  uchar *dir_entry;
unknown's avatar
unknown committed
1521
  char llbuff[22];
1522
  my_bool error_in_free_list= 0;
unknown's avatar
unknown committed
1523 1524
  DBUG_ENTER("check_page_layout");

1525
  block_size= info->s->block_size;
unknown's avatar
unknown committed
1526 1527 1528 1529
  empty= 0;
  last_row_end= PAGE_HEADER_SIZE;
  *real_rows_found= 0;

1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568
  /* Check free directory list */
  free_entry= (uint) page[DIR_FREE_OFFSET];
  free_entries= 0;
  prev_free_entry= END_OF_DIR_FREE_LIST;
  while (free_entry != END_OF_DIR_FREE_LIST)
  {
    uchar *dir;
    if (free_entry > row_count)
    {
      _ma_check_print_error(param,
                            "Page %9s:  Directory free entry points outside "
                            "directory",
                            llstr(page_pos, llbuff));
      error_in_free_list= 1;
      break;
    }
    dir= dir_entry_pos(page, block_size, free_entry);
    if (uint2korr(dir) != 0)
    {
      _ma_check_print_error(param,
                            "Page %9s:  Directory free entry points to "
                            "not deleted entry",
                            llstr(page_pos, llbuff));
      error_in_free_list= 1;
      break;
    }
    if (dir[2] != prev_free_entry)
    {
      _ma_check_print_error(param,
                            "Page %9s:  Directory free list back pointer "
                            "points to wrong entry",
                            llstr(page_pos, llbuff));
      error_in_free_list= 1;
      break;
    }
    prev_free_entry= free_entry;
    free_entry= dir[3];
    free_entries++;
  }
1569
  *free_slots_found= free_entries;
1570 1571

  /* Check directry */
1572 1573
  dir_entry= page+ block_size - PAGE_SUFFIX_SIZE;
  first_dir_entry= (block_size - row_count * DIR_ENTRY_SIZE -
1574
                    PAGE_SUFFIX_SIZE);
unknown's avatar
unknown committed
1575 1576 1577 1578 1579 1580 1581
  for (row= 0 ; row < row_count ; row++)
  {
    uint pos, length;
    dir_entry-= DIR_ENTRY_SIZE;
    pos= uint2korr(dir_entry);
    if (!pos)
    {
1582
      free_entries--;
unknown's avatar
unknown committed
1583
      if (row == row_count -1)
1584
      {
unknown's avatar
unknown committed
1585 1586 1587 1588 1589
        _ma_check_print_error(param,
                              "Page %9s:  First entry in directory is 0",
                              llstr(page_pos, llbuff));
        if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
          DBUG_RETURN(1);
1590
      }
unknown's avatar
unknown committed
1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619
      continue;                                 /* Deleted row */
    }
    (*real_rows_found)++;
    length= uint2korr(dir_entry+2);
    param->used+= length;
    if (pos < last_row_end)
    {
      _ma_check_print_error(param,
                            "Page %9s:  Row %3u overlapps with previous row",
                            llstr(page_pos, llbuff), row);
      DBUG_RETURN(1);
    }
    empty+= (pos - last_row_end);
    last_row_end= pos + length;
    if (last_row_end > first_dir_entry)
    {
      _ma_check_print_error(param,
                            "Page %9s:  Row %3u overlapps with directory",
                            llstr(page_pos, llbuff), row);
      DBUG_RETURN(1);
    }
  }
  empty+= (first_dir_entry - last_row_end);

  if (empty != head_empty)
  {
    _ma_check_print_error(param,
                          "Page %9s:  Wrong empty size.  Stored: %5u  Actual: %5u",
                          llstr(page_pos, llbuff), head_empty, empty);
1620
    param->err_count++;
unknown's avatar
unknown committed
1621
  }
1622 1623 1624 1625 1626 1627 1628 1629 1630 1631
  if (free_entries != 0 && !error_in_free_list)
  {
    _ma_check_print_error(param,
                          "Page %9s:  Directory free link don't include "
                          "all free entries",
                          llstr(page_pos, llbuff));
    param->err_count++;
  }
  DBUG_RETURN(param->err_count &&
              (param->err_count >= MAXERR || !(param->testflag & T_VERBOSE)));
unknown's avatar
unknown committed
1632 1633 1634 1635 1636 1637 1638
}


/*
  Check all rows on head page

  NOTES
1639 1640
    This is for rows-in-block format.

unknown's avatar
unknown committed
1641 1642 1643 1644 1645 1646 1647
    Before this, we have already called check_page_layout(), so
    we know the block is logicaly correct (even if the rows may not be that)

  RETURN
   0  ok
   1  error
*/
1648 1649


unknown's avatar
unknown committed
1650 1651
static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, uchar *record,
                               int extend, my_off_t page_pos, uchar *page_buff,
unknown's avatar
unknown committed
1652 1653
                               uint row_count)
{
1654
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
1655
  uchar *dir_entry;
unknown's avatar
unknown committed
1656 1657
  uint row;
  char llbuff[22], llbuff2[22];
unknown's avatar
unknown committed
1658
  ulonglong page= page_pos / share->block_size;
unknown's avatar
unknown committed
1659 1660
  DBUG_ENTER("check_head_page");

1661
  dir_entry= page_buff+ share->block_size - PAGE_SUFFIX_SIZE;
unknown's avatar
unknown committed
1662 1663 1664 1665 1666 1667 1668 1669
  for (row= 0 ; row < row_count ; row++)
  {
    uint pos, length, flag;
    dir_entry-= DIR_ENTRY_SIZE;
    pos= uint2korr(dir_entry);
    if (!pos)
      continue;
    length= uint2korr(dir_entry+2);
1670
    if (length < share->base.min_block_length)
unknown's avatar
unknown committed
1671 1672
    {
      _ma_check_print_error(param,
unknown's avatar
unknown committed
1673 1674 1675 1676
                            "Page %9s:  Row %3u is too short "
                            "(%d of min %d bytes)",
                            llstr(page, llbuff), row, length,
                            (uint) share->base.min_block_length);
unknown's avatar
unknown committed
1677 1678 1679 1680 1681
      DBUG_RETURN(1);
    }
    flag= (uint) (uchar) page_buff[pos];
    if (flag & ~(ROW_FLAG_ALL))
      _ma_check_print_error(param,
unknown's avatar
unknown committed
1682
                            "Page %9s: Row %3u has wrong flag: %u",
unknown's avatar
unknown committed
1683
                            llstr(page, llbuff), row, flag);
unknown's avatar
unknown committed
1684 1685

    DBUG_PRINT("info", ("rowid: %s  page: %lu  row: %u",
unknown's avatar
unknown committed
1686 1687
                        llstr(ma_recordpos(page, row), llbuff),
                        (ulong) page, row));
unknown's avatar
unknown committed
1688
    info->cur_row.trid= 0;
unknown's avatar
unknown committed
1689 1690 1691 1692 1693
    if (_ma_read_block_record2(info, record, page_buff+pos,
                               page_buff+pos+length))
    {
      _ma_check_print_error(param,
                            "Page %9s:  Row %3d is crashed",
unknown's avatar
unknown committed
1694
                            llstr(page, llbuff), row);
unknown's avatar
unknown committed
1695 1696 1697 1698
      if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
        DBUG_RETURN(1);
      continue;
    }
unknown's avatar
unknown committed
1699 1700 1701 1702
    set_if_bigger(param->max_found_trid, info->cur_row.trid);
    if (info->cur_row.trid > param->max_trid)
      _ma_check_print_not_visible_error(param, info->cur_row.trid);

1703
    if (share->calc_checksum)
unknown's avatar
unknown committed
1704
    {
1705
      ha_checksum checksum= (*share->calc_checksum)(info, record);
1706 1707
      if (info->cur_row.checksum != (checksum & 255))
        _ma_check_print_error(param, "Page %9s:  Row %3d has wrong checksum",
unknown's avatar
unknown committed
1708
                              llstr(page, llbuff), row);
1709
      param->glob_crc+= checksum;
unknown's avatar
unknown committed
1710 1711 1712
    }
    if (info->cur_row.extents_count)
    {
unknown's avatar
unknown committed
1713
      uchar *extents= info->cur_row.extents;
unknown's avatar
unknown committed
1714 1715 1716
      uint i;
      /* Check that bitmap has the right marker for the found extents */
      for (i= 0 ; i < info->cur_row.extents_count ; i++)
1717
      {
1718 1719
        pgcache_page_no_t extent_page;
        uint page_count, page_type;
unknown's avatar
unknown committed
1720
        extent_page= uint5korr(extents);
1721
        page_count=  uint2korr(extents+5) & ~START_EXTENT_BIT;
unknown's avatar
unknown committed
1722 1723 1724 1725 1726 1727 1728
        extents+=    ROW_EXTENT_SIZE;
        page_type=   BLOB_PAGE;
        if (page_count & TAIL_BIT)
        {
          page_count= 1;
          page_type= TAIL_PAGE;
        }
1729 1730 1731 1732 1733
        /*
          TODO OPTIMIZE:
          Check the whole extent with one test and only do the loop if
          something is wrong (for exact error reporting)
        */
unknown's avatar
unknown committed
1734
        for ( ; page_count--; extent_page++)
unknown's avatar
unknown committed
1735 1736
        {
          uint bitmap_pattern;
unknown's avatar
unknown committed
1737
          if (_ma_check_if_right_bitmap_type(info, page_type, extent_page,
unknown's avatar
unknown committed
1738 1739 1740 1741
                                             &bitmap_pattern))
          {
            _ma_check_print_error(param,
                                  "Page %9s:  Row: %3d has an extent with wrong information in bitmap:  Page %9s  Page_type: %d  Bitmap: %d",
unknown's avatar
unknown committed
1742 1743 1744
                                  llstr(page, llbuff), row,
                                  llstr(extent_page, llbuff2),
                                  page_type, bitmap_pattern);
unknown's avatar
unknown committed
1745 1746 1747 1748
            if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
              DBUG_RETURN(1);
          }
        }
1749 1750
      }
    }
unknown's avatar
unknown committed
1751 1752 1753
    param->full_page_count+= info->cur_row.full_page_count;
    param->tail_count+= info->cur_row.tail_count;
    if (check_keys_in_record(param, info, extend,
unknown's avatar
unknown committed
1754
                             ma_recordpos(page, row), record))
unknown's avatar
unknown committed
1755 1756 1757 1758 1759 1760
      DBUG_RETURN(1);
  }
  DBUG_RETURN(0);
}


1761 1762 1763
/*
  Check if rows-in-block data file is consistent
*/
unknown's avatar
unknown committed
1764 1765

static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
unknown's avatar
unknown committed
1766
                              uchar *record)
unknown's avatar
unknown committed
1767
{
1768
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
1769
  my_off_t pos;
1770
  pgcache_page_no_t page;
unknown's avatar
unknown committed
1771
  uchar *page_buff, *bitmap_buff, *data;
unknown's avatar
unknown committed
1772
  char llbuff[22], llbuff2[22];
1773
  uint block_size= share->block_size;
unknown's avatar
unknown committed
1774 1775
  ha_rows full_page_count, tail_count;
  my_bool full_dir;
1776
  uint offset_page, offset, free_count;
1777

1778 1779
  LINT_INIT(full_dir);

unknown's avatar
unknown committed
1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790
  if (_ma_scan_init_block_record(info))
  {
    _ma_check_print_error(param, "got error %d when initializing scan",
                          my_errno);
    return 1;
  }
  bitmap_buff= info->scan.bitmap_buff;
  page_buff= info->scan.page_buff;
  full_page_count= tail_count= 0;
  param->full_page_count= param->tail_count= 0;
  param->used= param->link_used= 0;
1791
  param->splits= share->state.state.data_file_length / block_size;
1792

unknown's avatar
unknown committed
1793
  for (pos= 0, page= 0;
1794
       pos < share->state.state.data_file_length;
unknown's avatar
unknown committed
1795
       pos+= block_size, page++)
unknown's avatar
unknown committed
1796 1797 1798 1799 1800 1801 1802 1803 1804 1805
  {
    uint row_count, real_row_count, empty_space, page_type, bitmap_pattern;
    LINT_INIT(row_count);
    LINT_INIT(empty_space);

    if (*_ma_killed_ptr(param))
    {
      _ma_scan_end_block_record(info);
      return -1;
    }
unknown's avatar
unknown committed
1806
    if ((page % share->bitmap.pages_covered) == 0)
unknown's avatar
unknown committed
1807 1808
    {
      /* Bitmap page */
1809
      if (pagecache_read(share->pagecache,
1810
                         &info->s->bitmap.file,
unknown's avatar
unknown committed
1811
                         page, 1,
unknown's avatar
unknown committed
1812 1813 1814
                         bitmap_buff,
                         PAGECACHE_PLAIN_PAGE,
                         PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
1815
      {
unknown's avatar
unknown committed
1816 1817
        _ma_check_print_error(param,
                              "Page %9s:  Got error: %d when reading datafile",
unknown's avatar
unknown committed
1818
                              llstr(page, llbuff), my_errno);
unknown's avatar
unknown committed
1819
        goto err;
1820
      }
unknown's avatar
unknown committed
1821 1822 1823
      param->used+= block_size;
      param->link_used+= block_size;
      continue;
1824
    }
unknown's avatar
unknown committed
1825
    /* Skip pages marked as empty in bitmap */
1826
    offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
unknown's avatar
unknown committed
1827 1828 1829 1830
    offset= offset_page & 7;
    data= bitmap_buff + offset_page / 8;
    bitmap_pattern= uint2korr(data);
    if (!((bitmap_pattern >> offset) & 7))
1831
    {
unknown's avatar
unknown committed
1832 1833 1834 1835 1836
      param->empty+= block_size;
      param->del_blocks++;
      continue;
    }

1837
    if (pagecache_read(share->pagecache,
unknown's avatar
unknown committed
1838
                       &info->dfile,
unknown's avatar
unknown committed
1839
                       page, 1,
unknown's avatar
unknown committed
1840
                       page_buff,
1841
                       share->page_type,
unknown's avatar
unknown committed
1842
                       PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
unknown's avatar
unknown committed
1843 1844 1845
    {
      _ma_check_print_error(param,
                            "Page %9s:  Got error: %d when reading datafile",
unknown's avatar
unknown committed
1846
                            llstr(page, llbuff), my_errno);
unknown's avatar
unknown committed
1847 1848 1849 1850 1851 1852
      goto err;
    }
    page_type= page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK;
    if (page_type == UNALLOCATED_PAGE || page_type >= MAX_PAGE_TYPE)
    {
      _ma_check_print_error(param,
unknown's avatar
unknown committed
1853 1854
                            "Page: %9s  Found wrong page type %d",
                            llstr(page, llbuff), page_type);
unknown's avatar
unknown committed
1855 1856
      if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
        goto err;
1857
      continue;
unknown's avatar
unknown committed
1858 1859 1860 1861
    }
    switch ((enum en_page_type) page_type) {
    case UNALLOCATED_PAGE:
    case MAX_PAGE_TYPE:
1862
    default:
1863
      DBUG_ASSERT(0);                           /* Impossible */
unknown's avatar
unknown committed
1864 1865
      break;
    case HEAD_PAGE:
1866
      row_count= ((uchar*) page_buff)[DIR_COUNT_OFFSET];
unknown's avatar
unknown committed
1867
      empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
unknown's avatar
unknown committed
1868
      param->used+= block_size - empty_space;
unknown's avatar
unknown committed
1869 1870
      param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE +
                          row_count * DIR_ENTRY_SIZE);
unknown's avatar
unknown committed
1871 1872
      if (empty_space < share->bitmap.sizes[3])
        param->lost+= empty_space;
1873 1874 1875 1876 1877
      if (check_page_layout(param, info, pos, page_buff, row_count,
                            empty_space, &real_row_count, &free_count))
        goto err;
      full_dir= (row_count == MAX_ROWS_PER_PAGE &&
                 page_buff[DIR_FREE_OFFSET] == END_OF_DIR_FREE_LIST);
unknown's avatar
unknown committed
1878 1879
      break;
    case TAIL_PAGE:
1880
      row_count= ((uchar*) page_buff)[DIR_COUNT_OFFSET];
unknown's avatar
unknown committed
1881
      empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
unknown's avatar
unknown committed
1882
      param->used+= block_size - empty_space;
unknown's avatar
unknown committed
1883 1884
      param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE +
                          row_count * DIR_ENTRY_SIZE);
unknown's avatar
unknown committed
1885 1886
      if (empty_space < share->bitmap.sizes[6])
        param->lost+= empty_space;
1887 1888 1889 1890 1891
      if (check_page_layout(param, info, pos, page_buff, row_count,
                            empty_space, &real_row_count, &free_count))
        goto err;
      full_dir= (row_count - free_count >= MAX_ROWS_PER_PAGE -
                 share->base.blobs);
unknown's avatar
unknown committed
1892 1893 1894 1895 1896 1897 1898 1899
      break;
    case BLOB_PAGE:
      full_page_count++;
      full_dir= 0;
      empty_space= block_size;                  /* for error reporting */
      param->link_used+= (LSN_SIZE + PAGE_TYPE_SIZE);
      param->used+= block_size;
      break;
1900
    }
unknown's avatar
unknown committed
1901
    if (_ma_check_bitmap_data(info, page_type, page,
unknown's avatar
unknown committed
1902 1903 1904
                              full_dir ? 0 : empty_space,
                              &bitmap_pattern))
    {
1905 1906
      if (bitmap_pattern == ~(uint) 0)
        _ma_check_print_error(param,
unknown's avatar
unknown committed
1907 1908
                              "Page %9s: Wrong bitmap for data on page",
                              llstr(page, llbuff));
1909
      else
1910 1911
        _ma_check_print_error(param,
                              "Page %9s:  Wrong data in bitmap.  Page_type: %d  empty_space: %u  Bitmap-bits: %d",
unknown's avatar
unknown committed
1912 1913
                              llstr(page, llbuff), page_type,
                              empty_space, bitmap_pattern);
unknown's avatar
unknown committed
1914 1915 1916 1917 1918 1919 1920
      if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
        goto err;
    }
    if ((enum en_page_type) page_type == BLOB_PAGE)
      continue;
    param->empty+= empty_space;
    if ((enum en_page_type) page_type == TAIL_PAGE)
1921
    {
unknown's avatar
unknown committed
1922 1923
      tail_count+= real_row_count;
      continue;
1924
    }
unknown's avatar
unknown committed
1925 1926 1927
    if (check_head_page(param, info, record, extend, pos, page_buff,
                        row_count))
      goto err;
1928
  }
unknown's avatar
unknown committed
1929

1930 1931
  /* Verify that rest of bitmap is zero */

unknown's avatar
unknown committed
1932
  if (page % share->bitmap.pages_covered)
1933 1934 1935
  {
    /* Not at end of bitmap */
    uint bitmap_pattern;
1936
    offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
1937 1938 1939 1940
    offset= offset_page & 7;
    data= bitmap_buff + offset_page / 8;
    bitmap_pattern= uint2korr(data);
    if (((bitmap_pattern >> offset)) ||
1941 1942
        (data + 2 < bitmap_buff + share->bitmap.total_size &&
         _ma_check_if_zero(data+2, bitmap_buff + share->bitmap.total_size -
1943 1944 1945
                           data - 2)))
    {
      ulonglong bitmap_page;
unknown's avatar
unknown committed
1946
      bitmap_page= page / share->bitmap.pages_covered;
1947
      bitmap_page*= share->bitmap.pages_covered;
1948

unknown's avatar
unknown committed
1949 1950 1951
      _ma_check_print_error(param,
                            "Bitmap at page %s has pages reserved outside of "
                            "data file length",
1952
                            llstr(bitmap_page, llbuff));
1953
      DBUG_EXECUTE("bitmap", _ma_print_bitmap(&share->bitmap, bitmap_buff,
1954 1955 1956 1957
                                              bitmap_page););
    }
  }

unknown's avatar
unknown committed
1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976
  _ma_scan_end_block_record(info);

  if (full_page_count != param->full_page_count)
    _ma_check_print_error(param, "Full page count read through records was %s but we found %s pages while scanning table",
                          llstr(param->full_page_count, llbuff),
                          llstr(full_page_count, llbuff2));
  if (tail_count != param->tail_count)
    _ma_check_print_error(param, "Tail count read through records was %s but we found %s tails while scanning table",
                          llstr(param->tail_count, llbuff),
                          llstr(tail_count, llbuff2));

  return param->error_printed != 0;

err:
  _ma_scan_end_block_record(info);
  return 1;
}


1977
/* Check that record-link is ok */
unknown's avatar
unknown committed
1978

1979
int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend)
unknown's avatar
unknown committed
1980
{
1981
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
1982
  int	error;
unknown's avatar
unknown committed
1983
  uchar *record;
unknown's avatar
unknown committed
1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994
  char llbuff[22],llbuff2[22],llbuff3[22];
  DBUG_ENTER("maria_chk_data_link");

  if (!(param->testflag & T_SILENT))
  {
    if (extend)
      puts("- check records and index references");
    else
      puts("- check record links");
  }

1995
  if (!(record= (uchar*) my_malloc(share->base.default_rec_buff_size, MYF(0))))
unknown's avatar
unknown committed
1996 1997 1998 1999 2000 2001
  {
    _ma_check_print_error(param,"Not enough memory for record");
    DBUG_RETURN(-1);
  }
  param->records= param->del_blocks= 0;
  param->used= param->link_used= param->splits= param->del_length= 0;
unknown's avatar
unknown committed
2002
  param->lost= 0;
unknown's avatar
unknown committed
2003 2004
  param->tmp_record_checksum= param->glob_crc= 0;
  param->err_count= 0;
unknown's avatar
unknown committed
2005

unknown's avatar
unknown committed
2006
  error= 0;
2007
  param->empty= share->pack.header_length;
unknown's avatar
unknown committed
2008 2009

  bzero((char*) param->tmp_key_crc,
2010
        share->base.keys * sizeof(param->tmp_key_crc[0]));
2011

2012
  switch (share->data_file_type) {
unknown's avatar
unknown committed
2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029
  case BLOCK_RECORD:
    error= check_block_record(param, info, extend, record);
    break;
  case STATIC_RECORD:
    error= check_static_record(param, info, extend, record);
    break;
  case DYNAMIC_RECORD:
    error= check_dynamic_record(param, info, extend, record);
    break;
  case COMPRESSED_RECORD:
    error= check_compressed_record(param, info, extend, record);
    break;
  } /* switch */

  if (error)
    goto err;

2030 2031 2032 2033
  if (param->testflag & T_WRITE_LOOP)
  {
    VOID(fputs("          \r",stdout)); VOID(fflush(stdout));
  }
2034
  if (param->records != share->state.state.records)
2035
  {
unknown's avatar
unknown committed
2036 2037 2038
    _ma_check_print_error(param,
                          "Record-count is not ok; found %-10s  Should be: %s",
                          llstr(param->records,llbuff),
2039
                          llstr(share->state.state.records,llbuff2));
2040 2041 2042
    error=1;
  }
  else if (param->record_checksum &&
unknown's avatar
unknown committed
2043
	   param->record_checksum != param->tmp_record_checksum)
2044 2045
  {
    _ma_check_print_error(param,
unknown's avatar
unknown committed
2046
                          "Key pointers and record positions doesn't match");
2047 2048
    error=1;
  }
2049
  else if (param->glob_crc != share->state.state.checksum &&
2050
	   (share->options &
2051 2052 2053
	    (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)))
  {
    _ma_check_print_warning(param,
unknown's avatar
unknown committed
2054
                            "Record checksum is not the same as checksum stored in the index file");
2055 2056 2057 2058
    error=1;
  }
  else if (!extend)
  {
unknown's avatar
unknown committed
2059
    uint key;
2060
    for (key=0 ; key < share->base.keys;  key++)
2061
    {
unknown's avatar
unknown committed
2062
      if (param->tmp_key_crc[key] != param->key_crc[key] &&
2063 2064
          !(share->keyinfo[key].flag &
            (HA_FULLTEXT | HA_SPATIAL | HA_RTREE_INDEX)))
2065 2066
      {
	_ma_check_print_error(param,"Checksum for key: %2d doesn't match checksum for records",
unknown's avatar
unknown committed
2067
                              key+1);
2068 2069 2070 2071 2072
	error=1;
      }
    }
  }

2073
  if (param->del_length != share->state.state.empty)
2074 2075
  {
    _ma_check_print_warning(param,
unknown's avatar
unknown committed
2076 2077
                            "Found %s deleted space.   Should be %s",
                            llstr(param->del_length,llbuff2),
2078
                            llstr(share->state.state.empty,llbuff));
2079
  }
unknown's avatar
unknown committed
2080 2081
  /* Skip following checks for BLOCK RECORD as they don't make any sence */
  if (share->data_file_type != BLOCK_RECORD)
2082
  {
unknown's avatar
unknown committed
2083
    if (param->used + param->empty + param->del_length !=
2084
        share->state.state.data_file_length)
unknown's avatar
unknown committed
2085 2086 2087 2088 2089 2090 2091 2092 2093 2094
    {
      _ma_check_print_warning(param,
                              "Found %s record data and %s unused data and %s deleted data",
                              llstr(param->used, llbuff),
                              llstr(param->empty,llbuff2),
                              llstr(param->del_length,llbuff3));
      _ma_check_print_warning(param,
                              "Total %s   Should be: %s",
                              llstr((param->used+param->empty +
                                     param->del_length), llbuff),
2095
                              llstr(share->state.state.data_file_length,llbuff2));
unknown's avatar
unknown committed
2096
    }
2097
    if (param->del_blocks != share->state.state.del)
unknown's avatar
unknown committed
2098 2099 2100 2101
    {
      _ma_check_print_warning(param,
                              "Found %10s deleted blocks       Should be: %s",
                              llstr(param->del_blocks,llbuff),
2102
                              llstr(share->state.state.del,llbuff2));
unknown's avatar
unknown committed
2103 2104 2105 2106 2107 2108 2109 2110
    }
    if (param->splits != share->state.split)
    {
      _ma_check_print_warning(param,
                              "Found %10s parts                Should be: %s parts",
                              llstr(param->splits, llbuff),
                              llstr(share->state.split,llbuff2));
    }
2111 2112 2113 2114 2115
  }
  if (param->testflag & T_INFO)
  {
    if (param->warning_printed || param->error_printed)
      puts("");
unknown's avatar
unknown committed
2116
    if (param->used != 0 && ! param->error_printed)
2117
    {
unknown's avatar
unknown committed
2118 2119 2120 2121 2122
      if (param->records)
      {
        printf("Records:%18s    M.recordlength:%9lu   Packed:%14.0f%%\n",
               llstr(param->records,llbuff),
               (long)((param->used - param->link_used)/param->records),
2123 2124
               (share->base.blobs ? 0.0 :
                (ulonglong2double((ulonglong) share->base.reclength *
unknown's avatar
unknown committed
2125 2126
                                  param->records)-
                 my_off_t2double(param->used))/
2127
                ulonglong2double((ulonglong) share->base.reclength *
unknown's avatar
unknown committed
2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139
                                 param->records)*100.0));
        printf("Recordspace used:%9.0f%%   Empty space:%12d%%  Blocks/Record: %6.2f\n",
               (ulonglong2double(param->used - param->link_used)/
                ulonglong2double(param->used-param->link_used+param->empty)*100.0),
               (!param->records ? 100 :
                (int) (ulonglong2double(param->del_length+param->empty)/
                       my_off_t2double(param->used)*100.0)),
               ulonglong2double(param->splits - param->del_blocks) /
               param->records);
      }
      else
        printf("Records:%18s\n", "0");
2140 2141
    }
    printf("Record blocks:%12s    Delete blocks:%10s\n",
unknown's avatar
unknown committed
2142
           llstr(param->splits - param->del_blocks, llbuff),
unknown's avatar
unknown committed
2143
           llstr(param->del_blocks, llbuff2));
2144
    printf("Record data:  %12s    Deleted data: %10s\n",
unknown's avatar
unknown committed
2145
           llstr(param->used - param->link_used,llbuff),
unknown's avatar
unknown committed
2146
           llstr(param->del_length, llbuff2));
unknown's avatar
unknown committed
2147 2148 2149 2150
    printf("Empty space:  %12s    Linkdata:     %10s\n",
           llstr(param->empty, llbuff),llstr(param->link_used, llbuff2));
    if (param->lost)
      printf("Lost space:   %12s", llstr(param->lost, llbuff));
unknown's avatar
unknown committed
2151 2152 2153 2154 2155
    if (param->max_found_trid)
    {
      printf("Max trans. id: %11s\n",
             llstr(param->max_found_trid, llbuff));
    }
2156
  }
unknown's avatar
unknown committed
2157
  my_free((uchar*) record,MYF(0));
2158
  DBUG_RETURN (error);
unknown's avatar
unknown committed
2159

unknown's avatar
unknown committed
2160
err:
unknown's avatar
unknown committed
2161
  my_free((uchar*) record,MYF(0));
2162 2163 2164 2165 2166
  param->testflag|=T_RETRY_WITHOUT_QUICK;
  DBUG_RETURN(1);
} /* maria_chk_data_link */


unknown's avatar
unknown committed
2167 2168 2169 2170 2171 2172 2173 2174 2175
/**
  Prepares a table for a repair or index sort: flushes pages, records durably
  in the table that it is undergoing the operation (if that op crashes, that
  info will serve for Recovery and the user).

  If we start overwriting the index file, and crash then, old REDOs will
  be tried and fail. To prevent that, we bump skip_redo_lsn, and thus we have
  to flush and sync pages so that old REDOs can be skipped.
  If this is not a bulk insert, which Recovery can handle gracefully (by
2176
  truncating files, see UNDO_BULK_INSERT) we also mark the table
unknown's avatar
unknown committed
2177 2178
  crashed-on-repair, so that user knows it has to re-repair. If bulk insert we
  shouldn't mark it crashed-on-repair, because if we did this, the UNDO phase
2179
  would skip the table (UNDO_BULK_INSERT would not be applied),
unknown's avatar
unknown committed
2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219
  and maria_chk would not improve that.
  If this is an OPTIMIZE which merely sorts index, we need to do the same
  too: old REDOs should not apply to the new index file.
  Only the flush is needed when in maria_chk which is not crash-safe.

  @param  info             table
  @param  param            repair parameters
  @param  discard_index    if index pages can be thrown away
*/

static my_bool protect_against_repair_crash(MARIA_HA *info,
                                            const HA_CHECK *param,
                                            my_bool discard_index)
{
  MARIA_SHARE *share= info->s;

  /*
    There are other than recovery-related reasons to do the writes below:
    - the physical size of the data file is sometimes used during repair: we
    need to flush to have it exact
    - we flush the state because maria_open(HA_OPEN_COPY) will want to read
    it from disk.
  */
  if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
                            FLUSH_FORCE_WRITE,
                            discard_index ? FLUSH_IGNORE_CHANGED :
                            FLUSH_FORCE_WRITE) ||
      (share->changed && _ma_state_info_write(share, 1|2|4)))
    return TRUE;
  /* In maria_chk this is not needed: */
  if (maria_multi_threaded && share->base.born_transactional)
  {
    if ((param->testflag & T_NO_CREATE_RENAME_LSN) == 0)
    {
      /* this can be true only for a transactional table */
      maria_mark_crashed_on_repair(info);
      if (_ma_state_info_write(share, 1|4))
        return TRUE;
    }
    if (translog_status == TRANSLOG_OK &&
2220 2221
        _ma_update_state_lsns(share, translog_get_horizon(),
                              share->state.create_trid, FALSE, FALSE))
unknown's avatar
unknown committed
2222 2223 2224 2225 2226 2227 2228 2229
      return TRUE;
    if (_ma_sync_table_files(info))
      return TRUE;
  }
  return FALSE;
}


2230 2231 2232 2233
/**
   @brief Initialize variables for repair
*/

2234 2235 2236 2237
static int initialize_variables_for_repair(HA_CHECK *param,
                                           MARIA_SORT_INFO *sort_info,
                                           MARIA_SORT_PARAM *sort_param,
                                           MARIA_HA *info,
2238
                                           my_bool rep_quick)
2239
{
2240 2241
  MARIA_SHARE *share= info->s;

2242 2243 2244 2245
  /* Repair code relies on share->state.state so we have to update it here */
  if (share->lock.update_status)
    (*share->lock.update_status)(info);

2246 2247
  bzero((char*) sort_info,  sizeof(*sort_info));
  bzero((char*) sort_param, sizeof(*sort_param));
2248 2249

  param->testflag|= T_REP;                     /* for easy checking */
2250
  if (share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
2251 2252
    param->testflag|= T_CALC_CHECKSUM;
  param->glob_crc= 0;
2253 2254 2255 2256
  if (rep_quick)
    param->testflag|= T_QUICK;
  else
    param->testflag&= ~T_QUICK;
2257
  param->org_key_map= share->state.key_map;
2258 2259

  sort_param->sort_info= sort_info;
2260
  sort_param->fix_datafile= ! rep_quick;
2261 2262 2263 2264
  sort_param->calc_checksum= test(param->testflag & T_CALC_CHECKSUM);
  sort_info->info= sort_info->new_info= info;
  sort_info->param= param;
  set_data_file_type(sort_info, info->s);
2265
  sort_info->org_data_file_type= share->data_file_type;
2266 2267 2268 2269

  bzero(&info->rec_cache, sizeof(info->rec_cache));
  info->rec_cache.file= info->dfile.file;
  info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
2270

unknown's avatar
unknown committed
2271 2272 2273
  if (protect_against_repair_crash(info, param, !test(param->testflag &
                                                      T_CREATE_MISSING_KEYS)))
    return 1;
2274

unknown's avatar
unknown committed
2275
  /* calculate max_records */
2276 2277 2278
  sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
  if ((param->testflag & T_CREATE_MISSING_KEYS) ||
      sort_info->org_data_file_type == COMPRESSED_RECORD)
2279
    sort_info->max_records= share->state.state.records;
2280 2281 2282 2283 2284 2285 2286
  else
  {
    ulong rec_length;
    rec_length= max(share->base.min_pack_length,
                    share->base.min_block_length);
    sort_info->max_records= (ha_rows) (sort_info->filelength / rec_length);
  }
unknown's avatar
unknown committed
2287 2288 2289 2290 2291 2292 2293 2294 2295 2296

  /* Set up transaction handler so that we can see all rows */
  if (!ma_control_file_inited())
    param->max_trid= 0;                 /* Give warning for first trid found */
  else
    param->max_trid= max_trid_in_system();

  maria_ignore_trids(info);
  /* Don't write transid's during repair */
  maria_versioning(info, 0);
2297
  return 0;
2298 2299 2300
}


2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408
/**
  @brief Drop all indexes

  @param[in]    param           check parameters
  @param[in]    info            MARIA_HA handle
  @param[in]    force           if to force drop all indexes

  @return       status
    @retval     0               OK
    @retval     != 0            Error

  @note
    Once allocated, index blocks remain part of the key file forever.
    When indexes are disabled, no block is freed. When enabling indexes,
    no block is freed either. The new indexes are create from new
    blocks. (Bug #4692)

    Before recreating formerly disabled indexes, the unused blocks
    must be freed. There are two options to do this:
    - Follow the tree of disabled indexes, add all blocks to the
      deleted blocks chain. Would require a lot of random I/O.
    - Drop all blocks by clearing all index root pointers and all
      delete chain pointers and resetting key_file_length to the end
      of the index file header. This requires to recreate all indexes,
      even those that may still be intact.
    The second method is probably faster in most cases.

    When disabling indexes, MySQL disables either all indexes or all
    non-unique indexes. When MySQL [re-]enables disabled indexes
    (T_CREATE_MISSING_KEYS), then we either have "lost" blocks in the
    index file, or there are no non-unique indexes. In the latter case,
    maria_repair*() would not be called as there would be no disabled
    indexes.

    If there would be more unique indexes than disabled (non-unique)
    indexes, we could do the first method. But this is not implemented
    yet. By now we drop and recreate all indexes when repair is called.

    However, there is an exception. Sometimes MySQL disables non-unique
    indexes when the table is empty (e.g. when copying a table in
    mysql_alter_table()). When enabling the non-unique indexes, they
    are still empty. So there is no index block that can be lost. This
    optimization is implemented in this function.

    Note that in normal repair (T_CREATE_MISSING_KEYS not set) we
    recreate all enabled indexes unconditonally. We do not change the
    key_map. Otherwise we invert the key map temporarily (outside of
    this function) and recreate the then "seemingly" enabled indexes.
    When we cannot use the optimization, and drop all indexes, we
    pretend that all indexes were disabled. By the inversion, we will
    then recrate all indexes.
*/

static int maria_drop_all_indexes(HA_CHECK *param, MARIA_HA *info,
                                  my_bool force)
{
  MARIA_SHARE *share= info->s;
  MARIA_STATE_INFO *state= &share->state;
  uint i;
  DBUG_ENTER("maria_drop_all_indexes");

  /*
    If any of the disabled indexes has a key block assigned, we must
    drop and recreate all indexes to avoid losing index blocks.

    If we want to recreate disabled indexes only _and_ all of these
    indexes are empty, we don't need to recreate the existing indexes.
  */
  if (!force && (param->testflag & T_CREATE_MISSING_KEYS))
  {
    DBUG_PRINT("repair", ("creating missing indexes"));
    for (i= 0; i < share->base.keys; i++)
    {
      DBUG_PRINT("repair", ("index #: %u  key_root: 0x%lx  active: %d",
                            i, (long) state->key_root[i],
                            maria_is_key_active(state->key_map, i)));
      if ((state->key_root[i] != HA_OFFSET_ERROR) &&
          !maria_is_key_active(state->key_map, i))
      {
        /*
          This index has at least one key block and it is disabled.
          We would lose its block(s) if would just recreate it.
          So we need to drop and recreate all indexes.
        */
        DBUG_PRINT("repair", ("nonempty and disabled: recreate all"));
        break;
      }
    }
    if (i >= share->base.keys)
      goto end;

    /*
      We do now drop all indexes and declare them disabled. With the
      T_CREATE_MISSING_KEYS flag, maria_repair*() will recreate all
      disabled indexes and enable them.
    */
    maria_clear_all_keys_active(state->key_map);
    DBUG_PRINT("repair", ("declared all indexes disabled"));
  }

  /* Clear index root block pointers. */
  for (i= 0; i < share->base.keys; i++)
    state->key_root[i]= HA_OFFSET_ERROR;

  /* Drop the delete chain. */
  share->state.key_del=  HA_OFFSET_ERROR;

  /* Reset index file length to end of index file header. */
2409
  share->state.state.key_file_length= share->base.keystart;
2410 2411 2412 2413 2414 2415

end:
  DBUG_RETURN(0);
}


2416 2417 2418 2419
/*
  Recover old table by reading each record and writing all keys

  NOTES
2420 2421 2422 2423 2424
    Save new datafile-name in temp_filename.
    We overwrite the index file as we go (writekeys() for example), so if we
    crash during this the table is unusable and user (or Recovery in the
    future) must repeat the REPAIR/OPTIMIZE operation. We could use a
    temporary index file in the future (drawback: more disk space).
2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436

  IMPLEMENTATION (for hard repair with block format)
   - Create new, unrelated MARIA_HA of the table
   - Create new datafile and associate it with new handler
   - Reset all statistic information in new handler
   - Copy all data to new handler with normal write operations
   - Move state of new handler to old handler
   - Close new handler
   - Close data file in old handler
   - Rename old data file to new data file.
   - Reopen data file in old handler
*/
2437 2438

int maria_repair(HA_CHECK *param, register MARIA_HA *info,
2439
                 char *name, my_bool rep_quick)
2440
{
2441
  int error, got_error;
2442 2443 2444
  ha_rows start_records,new_header_length;
  my_off_t del;
  File new_file;
2445
  MARIA_SHARE *share= info->s;
2446 2447 2448
  char llbuff[22],llbuff2[22];
  MARIA_SORT_INFO sort_info;
  MARIA_SORT_PARAM sort_param;
unknown's avatar
unknown committed
2449 2450
  my_bool block_record, scan_inited= 0,
    reenable_logging= share->now_transactional;
2451
  enum data_file_type org_data_file_type= share->data_file_type;
unknown's avatar
unknown committed
2452 2453
  myf sync_dir= ((share->now_transactional && !share->temporary) ?
                 MY_SYNC_DIR : 0);
2454 2455
  DBUG_ENTER("maria_repair");

2456
  got_error= 1;
2457
  new_file= -1;
2458
  start_records= share->state.state.records;
2459 2460 2461
  if (!(param->testflag & T_SILENT))
  {
    printf("- recovering (with keycache) MARIA-table '%s'\n",name);
2462
    printf("Data records: %s\n", llstr(start_records, llbuff));
2463 2464
  }

2465 2466
  if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
                                      rep_quick))
unknown's avatar
unknown committed
2467
    goto err;
unknown's avatar
unknown committed
2468

unknown's avatar
unknown committed
2469
  if (reenable_logging)
unknown's avatar
unknown committed
2470
    _ma_tmp_disable_logging_for_table(info, 0);
unknown's avatar
unknown committed
2471

2472
  sort_param.current_filepos= sort_param.filepos= new_header_length=
2473
    ((param->testflag & T_UNPACK) ? 0L : share->pack.header_length);
2474

2475 2476 2477
  if (!rep_quick)
  {
    /* Get real path for data file */
unknown's avatar
unknown committed
2478
    if ((new_file= my_create(fn_format(param->temp_filename,
2479
                                       share->data_file_name.str, "",
unknown's avatar
unknown committed
2480 2481 2482
                                       DATA_TMP_EXT, 2+4),
                             0,param->tmpfile_createflag,
                             MYF(0))) < 0)
2483 2484 2485 2486 2487
    {
      _ma_check_print_error(param,"Can't create new tempfile: '%s'",
			   param->temp_filename);
      goto err;
    }
2488
    if (new_header_length &&
unknown's avatar
unknown committed
2489 2490
        maria_filecopy(param, new_file, info->dfile.file, 0L,
                       new_header_length, "datafile-header"))
2491
      goto err;
2492
    share->state.dellink= HA_OFFSET_ERROR;
2493
    info->rec_cache.file= new_file;             /* For sort_delete_record */
2494
    if (share->data_file_type == BLOCK_RECORD ||
2495
        (param->testflag & T_UNPACK))
2496
    {
2497
      if (create_new_data_handle(&sort_param, new_file))
2498
        goto err;
2499
      sort_info.new_info->rec_cache.file= new_file;
2500 2501 2502
    }
  }

2503 2504
  block_record= sort_info.new_info->s->data_file_type == BLOCK_RECORD;

2505 2506 2507 2508 2509 2510 2511
  if (org_data_file_type != BLOCK_RECORD)
  {
    /* We need a read buffer to read rows in big blocks */
    if (init_io_cache(&param->read_cache, info->dfile.file,
                      (uint) param->read_buffer_length,
                      READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)))
      goto err;
2512
  }
2513 2514 2515 2516
  if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
  {
    /* When writing to not block records, we need a write buffer */
    if (!rep_quick)
2517 2518
    {
      if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
2519 2520
                        (uint) param->write_buffer_length,
                        WRITE_CACHE, new_header_length, 1,
2521
                        MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
2522
        goto err;
2523 2524
      sort_info.new_info->opt_flag|=WRITE_CACHE_USED;
    }
2525
  }
2526
  else if (block_record)
2527 2528 2529 2530 2531 2532
  {
    scan_inited= 1;
    if (maria_scan_init(sort_info.info))
      goto err;
  }

2533 2534 2535
  if (!(sort_param.record=
        (uchar *) my_malloc((uint)
                            share->base.default_rec_buff_size, MYF(0))) ||
2536
      _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
2537
                       share->base.default_rec_buff_size))
2538 2539 2540
  {
    _ma_check_print_error(param, "Not enough memory for extra record");
    goto err;
2541
  }
2542

2543 2544
  sort_param.read_cache=param->read_cache;
  sort_param.pos=sort_param.max_pos=share->pack.header_length;
2545
  param->read_cache.end_of_file= sort_info.filelength;
2546 2547 2548
  sort_param.master=1;
  sort_info.max_records= ~(ha_rows) 0;

2549 2550 2551
  del= share->state.state.del;
  share->state.state.records= share->state.state.del= share->state.split= 0;
  share->state.state.empty= 0;
2552 2553 2554

  if (param->testflag & T_CREATE_MISSING_KEYS)
    maria_set_all_keys_active(share->state.key_map, share->base.keys);
2555
  maria_drop_all_indexes(param, info, TRUE);
2556 2557 2558 2559 2560 2561

  maria_lock_memory(param);			/* Everything is alloced */

  /* Re-create all keys, which are set in key_map. */
  while (!(error=sort_get_next_record(&sort_param)))
  {
2562 2563 2564
    if (block_record && _ma_sort_write_record(&sort_param))
      goto err;

2565
    if (writekeys(&sort_param))
2566 2567 2568
    {
      if (my_errno != HA_ERR_FOUND_DUPP_KEY)
	goto err;
2569 2570
      DBUG_DUMP("record", (uchar*) sort_param.record,
                share->base.default_rec_buff_size);
2571 2572 2573 2574 2575
      _ma_check_print_warning(param,
                              "Duplicate key %2d for record at %10s against new record at %10s",
                              info->errkey+1,
                              llstr(sort_param.current_filepos, llbuff),
                              llstr(info->dup_key_pos,llbuff2));
2576 2577
      if (param->testflag & T_VERBOSE)
      {
2578 2579 2580 2581 2582 2583
        MARIA_KEY tmp_key;
        MARIA_KEYDEF *keyinfo= share->keyinfo + info->errkey;
	(*keyinfo->make_key)(info, &tmp_key, (uint) info->errkey,
                             info->lastkey_buff,
                             sort_param.record, 0L, 0);
        _ma_print_key(stdout, &tmp_key);
2584 2585 2586 2587 2588 2589 2590 2591
      }
      sort_info.dupp++;
      if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
      {
        param->testflag|=T_RETRY_WITHOUT_QUICK;
	param->error_printed=1;
	goto err;
      }
unknown's avatar
unknown committed
2592 2593 2594
      /* purecov: begin tested */
      if (block_record)
      {
2595
        sort_info.new_info->s->state.state.records--;
unknown's avatar
unknown committed
2596 2597 2598 2599 2600 2601 2602
        if ((*sort_info.new_info->s->write_record_abort)(sort_info.new_info))
        {
          _ma_check_print_error(param,"Couldn't delete duplicate row");
          goto err;
        }
      }
      /* purecov: end */
2603
      continue;
2604
    }
2605 2606 2607 2608 2609 2610 2611
    if (!block_record)
    {
      if (_ma_sort_write_record(&sort_param))
        goto err;
      /* Filepos is pointer to where next row will be stored */
      sort_param.current_filepos= sort_param.filepos;
    }
2612
  }
2613
  if (error > 0 || maria_write_data_suffix(&sort_info, !rep_quick) ||
2614 2615
      flush_io_cache(&sort_info.new_info->rec_cache) ||
      param->read_cache.error < 0)
2616 2617 2618 2619 2620 2621
    goto err;

  if (param->testflag & T_WRITE_LOOP)
  {
    VOID(fputs("          \r",stdout)); VOID(fflush(stdout));
  }
2622
  if (my_chsize(share->kfile.file, share->state.state.key_file_length, 0, MYF(0)))
2623 2624 2625 2626 2627 2628 2629
  {
    _ma_check_print_warning(param,
			   "Can't change size of indexfile, error: %d",
			   my_errno);
    goto err;
  }

2630
  if (rep_quick && del+sort_info.dupp != share->state.state.del)
2631 2632 2633 2634 2635 2636 2637
  {
    _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records");
    _ma_check_print_error(param,"Run recovery again without -q");
    param->retry_repair=1;
    param->testflag|=T_RETRY_WITHOUT_QUICK;
    goto err;
  }
2638

2639 2640 2641
  if (param->testflag & T_SAFE_REPAIR)
  {
    /* Don't repair if we loosed more than one row */
2642
    if (sort_info.new_info->s->state.state.records+1 < start_records)
2643
    {
2644
      share->state.state.records= start_records;
2645 2646 2647 2648
      goto err;
    }
  }

2649 2650
  VOID(end_io_cache(&sort_info.new_info->rec_cache));
  info->opt_flag&= ~WRITE_CACHE_USED;
unknown's avatar
unknown committed
2651 2652 2653 2654 2655 2656

  /*
    As we have read the data file (sort_get_next_record()) we may have
    cached, non-changed blocks of it in the page cache. We must throw them
    away as we are going to close their descriptor ('new_file'). We also want
    to flush any index block, so that it is ready for the upcoming sync.
unknown's avatar
unknown committed
2657
  */
unknown's avatar
unknown committed
2658
  if (_ma_flush_table_files_before_swap(param, info))
2659 2660
    goto err;

2661 2662
  if (!rep_quick)
  {
2663
    sort_info.new_info->s->state.state.data_file_length= sort_param.filepos;
2664 2665 2666 2667 2668 2669 2670 2671
    if (sort_info.new_info != sort_info.info)
    {
      MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
      if (maria_close(sort_info.new_info))
      {
        _ma_check_print_error(param, "Got error %d on close", my_errno);
        goto err;
      }
2672
      copy_data_file_state(&share->state, &save_state);
2673
      new_file= -1;
2674
      sort_info.new_info= info;
2675
    }
2676
    share->state.version=(ulong) time((time_t*) 0);	/* Force reopen */
2677 2678 2679 2680

    /* Replace the actual file with the temporary file */
    if (new_file >= 0)
      my_close(new_file, MYF(MY_WME));
unknown's avatar
unknown committed
2681 2682
    new_file= -1;
    change_data_file_descriptor(info, -1);
2683
    if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
2684 2685
                                DATA_TMP_EXT,
                                (param->testflag & T_BACKUP_DATA ?
2686 2687
                                 MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
                                sync_dir) ||
2688 2689 2690 2691
        _ma_open_datafile(info, share, -1))
    {
      goto err;
    }
2692 2693 2694
  }
  else
  {
2695
    share->state.state.data_file_length= sort_param.max_pos;
2696 2697
  }
  if (param->testflag & T_CALC_CHECKSUM)
2698
    share->state.state.checksum= param->glob_crc;
2699 2700 2701

  if (!(param->testflag & T_SILENT))
  {
2702 2703
    if (start_records != share->state.state.records)
      printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
2704
  }
unknown's avatar
unknown committed
2705 2706 2707 2708
  if (sort_info.dupp)
    _ma_check_print_warning(param,
                            "%s records have been removed",
                            llstr(sort_info.dupp,llbuff));
2709

unknown's avatar
unknown committed
2710
  got_error= 0;
2711 2712
  /* If invoked by external program that uses thr_lock */
  if (&share->state.state != info->state)
2713
    *info->state= *info->state_start= share->state.state;
2714 2715

err:
2716 2717
  if (scan_inited)
    maria_scan_end(sort_info.info);
2718
  _ma_reset_state(info);
2719

unknown's avatar
unknown committed
2720
  VOID(end_io_cache(&param->read_cache));
2721
  VOID(end_io_cache(&sort_info.new_info->rec_cache));
unknown's avatar
unknown committed
2722
  info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
2723
  sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
unknown's avatar
unknown committed
2724
  /* this below could fail, shouldn't we detect error? */
2725 2726 2727 2728 2729
  if (got_error)
  {
    if (! param->error_printed)
      _ma_check_print_error(param,"%d for record at pos %s",my_errno,
		  llstr(sort_param.start_recpos,llbuff));
unknown's avatar
unknown committed
2730
    (void)_ma_flush_table_files_before_swap(param, info);
2731 2732
    if (sort_info.new_info && sort_info.new_info != sort_info.info)
    {
2733
      unuse_data_file_descriptor(sort_info.new_info);
2734 2735
      maria_close(sort_info.new_info);
    }
2736 2737 2738
    if (new_file >= 0)
    {
      VOID(my_close(new_file,MYF(0)));
unknown's avatar
unknown committed
2739
      VOID(my_delete(param->temp_filename, MYF(MY_WME)));
2740 2741 2742
    }
    maria_mark_crashed_on_repair(info);
  }
unknown's avatar
unknown committed
2743 2744
  /* If caller had disabled logging it's not up to us to re-enable it */
  if (reenable_logging)
2745
    _ma_reenable_logging_for_table(info, FALSE);
unknown's avatar
unknown committed
2746

unknown's avatar
unknown committed
2747
  my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR));
2748 2749
  my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR));
  my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR));
unknown's avatar
unknown committed
2750 2751
  if (!got_error && (param->testflag & T_UNPACK))
    restore_data_file_type(share);
2752
  share->state.changed|= (STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES |
unknown's avatar
unknown committed
2753 2754 2755
			  STATE_NOT_ANALYZED | STATE_NOT_ZEROFILLED);
  if (!rep_quick)
    share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_MOVABLE);
2756 2757 2758 2759
  DBUG_RETURN(got_error);
}


unknown's avatar
unknown committed
2760
/* Uppdate keyfile when doing repair */
2761

2762
static int writekeys(MARIA_SORT_PARAM *sort_param)
2763
{
2764 2765 2766
  uint i;
  MARIA_HA *info=     sort_param->sort_info->info;
  MARIA_SHARE *share= info->s;
2767 2768
  uchar *record=    sort_param->record;
  uchar *key_buff;
2769
  my_off_t filepos=   sort_param->current_filepos;
2770
  MARIA_KEY key;
2771 2772
  DBUG_ENTER("writekeys");

2773 2774
  key_buff= info->lastkey_buff+share->base.max_key_length;

2775
  for (i=0 ; i < share->base.keys ; i++)
2776
  {
2777
    if (maria_is_key_active(share->state.key_map, i))
2778
    {
2779
      if (share->keyinfo[i].flag & HA_FULLTEXT )
2780
      {
2781
        if (_ma_ft_add(info, i, key_buff, record, filepos))
2782 2783 2784 2785
	  goto err;
      }
      else
      {
2786 2787 2788 2789
	if (!(*share->keyinfo[i].make_key)(info, &key, i, key_buff, record,
                                         filepos, 0))
          goto err;
	if ((*share->keyinfo[i].ck_insert)(info, &key))
2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801
	  goto err;
      }
    }
  }
  DBUG_RETURN(0);

 err:
  if (my_errno == HA_ERR_FOUND_DUPP_KEY)
  {
    info->errkey=(int) i;			/* This key was found */
    while ( i-- > 0 )
    {
2802
      if (maria_is_key_active(share->state.key_map, i))
2803
      {
2804
	if (share->keyinfo[i].flag & HA_FULLTEXT)
2805
        {
2806
          if (_ma_ft_del(info,i,key_buff,record,filepos))
2807 2808 2809 2810
	    break;
        }
        else
	{
2811 2812 2813
	  (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record,
                                        filepos, 0);
	  if (_ma_ck_delete(info, &key))
2814 2815 2816 2817 2818 2819
	    break;
	}
      }
    }
  }
  /* Remove checksum that was added to glob_crc in sort_get_next_record */
2820
  if (sort_param->calc_checksum)
unknown's avatar
unknown committed
2821
    sort_param->sort_info->param->glob_crc-= info->cur_row.checksum;
2822 2823 2824 2825 2826 2827 2828
  DBUG_PRINT("error",("errno: %d",my_errno));
  DBUG_RETURN(-1);
} /* writekeys */


	/* Change all key-pointers that points to a records */

unknown's avatar
unknown committed
2829
int maria_movepoint(register MARIA_HA *info, uchar *record,
unknown's avatar
unknown committed
2830 2831
                    MARIA_RECORD_POS oldpos, MARIA_RECORD_POS newpos,
                    uint prot_key)
2832
{
2833 2834
  uint i;
  uchar *key_buff;
2835
  MARIA_SHARE *share= info->s;
2836 2837
  DBUG_ENTER("maria_movepoint");

2838
  key_buff= info->lastkey_buff + share->base.max_key_length;
2839
  for (i=0 ; i < share->base.keys; i++)
2840
  {
2841
    if (i != prot_key && maria_is_key_active(share->state.key_map, i))
2842
    {
2843 2844 2845 2846
      MARIA_KEY key;
      (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record, oldpos,
                                    0);
      if (key.keyinfo->flag & HA_NOSAME)
2847 2848 2849
      {					/* Change pointer direct */
	uint nod_flag;
	MARIA_KEYDEF *keyinfo;
2850
	keyinfo=share->keyinfo+i;
2851
	if (_ma_search(info, &key, (uint32) (SEARCH_SAME | SEARCH_SAVE_BUFF),
2852
		       share->state.key_root[i]))
2853
	  DBUG_RETURN(-1);
2854
	nod_flag= _ma_test_if_nod(share, info->buff);
2855
	_ma_dpointer(share, info->int_keypos - nod_flag -
2856
		     share->rec_reflength,newpos);
2857 2858 2859
	if (_ma_write_keypage(info, keyinfo, info->last_keypage,
                              PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
                              info->buff))
2860 2861 2862 2863
	  DBUG_RETURN(-1);
      }
      else
      {					/* Change old key to new */
2864
	if (_ma_ck_delete(info, &key))
2865
	  DBUG_RETURN(-1);
2866 2867 2868
	(*share->keyinfo[i].make_key)(info, &key, i, key_buff, record, newpos,
                                      0);
	if (_ma_ck_write(info, &key))
2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892
	  DBUG_RETURN(-1);
      }
    }
  }
  DBUG_RETURN(0);
} /* maria_movepoint */


	/* Tell system that we want all memory for our cache */

void maria_lock_memory(HA_CHECK *param __attribute__((unused)))
{
#ifdef SUN_OS				/* Key-cacheing thrases on sun 4.1 */
  if (param->opt_maria_lock_memory)
  {
    int success = mlockall(MCL_CURRENT);	/* or plock(DATLOCK); */
    if (geteuid() == 0 && success != 0)
      _ma_check_print_warning(param,
			     "Failed to lock memory. errno %d",my_errno);
  }
#endif
} /* maria_lock_memory */


unknown's avatar
unknown committed
2893
/**
unknown's avatar
unknown committed
2894
   Flush all changed blocks to disk.
unknown's avatar
unknown committed
2895 2896

   We release blocks as it's unlikely that they would all be needed soon.
unknown's avatar
unknown committed
2897 2898
   This function needs to be called before swapping data or index files or
   syncing them.
2899

unknown's avatar
unknown committed
2900 2901 2902 2903
   @param  param           description of the repair operation
   @param  info            table
*/

unknown's avatar
unknown committed
2904 2905
static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
                                                 MARIA_HA *info)
2906
{
unknown's avatar
unknown committed
2907
  DBUG_ENTER("_ma_flush_table_files_before_swap");
unknown's avatar
unknown committed
2908
  if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
unknown's avatar
unknown committed
2909
                            FLUSH_RELEASE, FLUSH_RELEASE))
2910
  {
unknown's avatar
unknown committed
2911 2912
    _ma_check_print_error(param, "%d when trying to write buffers", my_errno);
    DBUG_RETURN(TRUE);
2913
  }
unknown's avatar
unknown committed
2914 2915
  DBUG_RETURN(FALSE);
}
2916 2917 2918 2919


	/* Sort index for more efficent reads */

unknown's avatar
unknown committed
2920
int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name)
2921 2922 2923 2924 2925 2926 2927
{
  reg2 uint key;
  reg1 MARIA_KEYDEF *keyinfo;
  File new_file;
  my_off_t index_pos[HA_MAX_POSSIBLE_KEY];
  uint r_locks,w_locks;
  int old_lock;
2928
  MARIA_SHARE *share= info->s;
2929
  MARIA_STATE_INFO old_state;
2930 2931
  myf sync_dir= ((share->now_transactional && !share->temporary) ?
                 MY_SYNC_DIR : 0);
2932 2933
  DBUG_ENTER("maria_sort_index");

2934 2935 2936 2937 2938 2939
  /* cannot sort index files with R-tree indexes */
  for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
       key++,keyinfo++)
    if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
      DBUG_RETURN(0);

2940 2941 2942
  if (!(param->testflag & T_SILENT))
    printf("- Sorting index for MARIA-table '%s'\n",name);

unknown's avatar
unknown committed
2943 2944 2945
  if (protect_against_repair_crash(info, param, FALSE))
    DBUG_RETURN(1);

2946 2947 2948 2949 2950 2951 2952 2953 2954 2955
  /* Get real path for index file */
  fn_format(param->temp_filename,name,"", MARIA_NAME_IEXT,2+4+32);
  if ((new_file=my_create(fn_format(param->temp_filename,param->temp_filename,
				    "", INDEX_TMP_EXT,2+4),
			  0,param->tmpfile_createflag,MYF(0))) <= 0)
  {
    _ma_check_print_error(param,"Can't create new tempfile: '%s'",
			 param->temp_filename);
    DBUG_RETURN(-1);
  }
unknown's avatar
unknown committed
2956
  if (maria_filecopy(param, new_file, share->kfile.file, 0L,
unknown's avatar
unknown committed
2957
                     (ulong) share->base.keystart, "headerblock"))
2958 2959 2960 2961 2962 2963
    goto err;

  param->new_file_pos=share->base.keystart;
  for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
       key++,keyinfo++)
  {
2964
    if (! maria_is_key_active(share->state.key_map, key))
2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977
      continue;

    if (share->state.key_root[key] != HA_OFFSET_ERROR)
    {
      index_pos[key]=param->new_file_pos;	/* Write first block here */
      if (sort_one_index(param,info,keyinfo,share->state.key_root[key],
			 new_file))
	goto err;
    }
    else
      index_pos[key]= HA_OFFSET_ERROR;		/* No blocks */
  }

unknown's avatar
unknown committed
2978
  /* Flush key cache for this file if we are calling this outside maria_chk */
unknown's avatar
unknown committed
2979 2980
  flush_pagecache_blocks(share->pagecache, &share->kfile,
                         FLUSH_IGNORE_CHANGED);
2981 2982 2983 2984 2985 2986 2987 2988 2989 2990

  share->state.version=(ulong) time((time_t*) 0);
  old_state= share->state;			/* save state if not stored */
  r_locks=   share->r_locks;
  w_locks=   share->w_locks;
  old_lock=  info->lock_type;

	/* Put same locks as old file */
  share->r_locks= share->w_locks= share->tot_locks= 0;
  (void) _ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE);
unknown's avatar
unknown committed
2991
  pthread_mutex_lock(&share->intern_lock);
unknown's avatar
unknown committed
2992 2993
  VOID(my_close(share->kfile.file, MYF(MY_WME)));
  share->kfile.file = -1;
unknown's avatar
unknown committed
2994
  pthread_mutex_unlock(&share->intern_lock);
2995
  VOID(my_close(new_file,MYF(MY_WME)));
2996
  if (maria_change_to_newfile(share->index_file_name.str, MARIA_NAME_IEXT,
2997
                              INDEX_TMP_EXT, sync_dir) ||
2998 2999 3000 3001 3002 3003 3004 3005 3006 3007
      _ma_open_keyfile(share))
    goto err2;
  info->lock_type= F_UNLCK;			/* Force maria_readinfo to lock */
  _ma_readinfo(info,F_WRLCK,0);			/* Will lock the table */
  info->lock_type=  old_lock;
  share->r_locks=   r_locks;
  share->w_locks=   w_locks;
  share->tot_locks= r_locks+w_locks;
  share->state=     old_state;			/* Restore old state */

3008
  share->state.state.key_file_length=param->new_file_pos;
3009
  info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
3010 3011 3012
  for (key=0 ; key < share->base.keys ; key++)
    share->state.key_root[key]=index_pos[key];
  share->state.key_del=  HA_OFFSET_ERROR;
3013

3014
  share->state.changed&= ~STATE_NOT_SORTED_PAGES;
unknown's avatar
unknown committed
3015 3016 3017 3018 3019 3020 3021 3022
  DBUG_EXECUTE_IF("maria_flush_whole_log",
                  {
                    DBUG_PRINT("maria_flush_whole_log", ("now"));
                    translog_flush(translog_get_horizon());
                  });
  DBUG_EXECUTE_IF("maria_crash_sort_index",
                  {
                    DBUG_PRINT("maria_crash_sort_index", ("now"));
3023
                    DBUG_ABORT();
unknown's avatar
unknown committed
3024
                  });
3025 3026 3027 3028 3029 3030 3031 3032 3033 3034
  DBUG_RETURN(0);

err:
  VOID(my_close(new_file,MYF(MY_WME)));
err2:
  VOID(my_delete(param->temp_filename,MYF(MY_WME)));
  DBUG_RETURN(-1);
} /* maria_sort_index */


unknown's avatar
unknown committed
3035 3036 3037 3038 3039 3040 3041 3042
/**
  @brief put CRC on the page

  @param buff            reference on the page buffer.
  @param pos             position of the page in the file.
  @param length          length of the page
*/

unknown's avatar
unknown committed
3043
static void put_crc(uchar *buff, my_off_t pos, MARIA_SHARE *share)
unknown's avatar
unknown committed
3044
{
3045 3046
  maria_page_crc_set_index(buff, (pgcache_page_no_t) (pos / share->block_size),
                           (uchar*) share);
unknown's avatar
unknown committed
3047 3048 3049
}


3050
/* Sort index blocks recursive using one index */
3051

unknown's avatar
unknown committed
3052 3053
static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
                          MARIA_KEYDEF *keyinfo,
3054 3055
			  my_off_t pagepos, File new_file)
{
3056
  uint length,nod_flag,used_length;
unknown's avatar
unknown committed
3057
  uchar *buff,*keypos,*endpos;
3058
  my_off_t new_page_pos,next_page;
3059
  MARIA_SHARE *share= info->s;
3060
  MARIA_KEY key;
3061 3062
  DBUG_ENTER("sort_one_index");

3063 3064
  /* cannot walk over R-tree indices */
  DBUG_ASSERT(keyinfo->key_alg != HA_KEY_ALG_RTREE);
3065 3066
  new_page_pos=param->new_file_pos;
  param->new_file_pos+=keyinfo->block_length;
3067 3068
  key.keyinfo= keyinfo;
  key.data= info->lastkey_buff;
3069

unknown's avatar
unknown committed
3070
  if (!(buff= (uchar*) my_alloca((uint) keyinfo->block_length)))
3071 3072 3073 3074
  {
    _ma_check_print_error(param,"Not enough memory for key block");
    DBUG_RETURN(-1);
  }
3075 3076
  if (!_ma_fetch_keypage(info, keyinfo, pagepos,PAGECACHE_LOCK_LEFT_UNLOCKED,
                         DFLT_INIT_HITS, buff, 0, 0))
3077
  {
unknown's avatar
unknown committed
3078
    report_keypage_fault(param, info, pagepos);
3079 3080
    goto err;
  }
3081
  if ((nod_flag=_ma_test_if_nod(share, buff)) || keyinfo->flag & HA_FULLTEXT)
3082
  {
3083
    uint page_flag= _ma_get_keypage_flag(share, buff);
3084 3085
    used_length= _ma_get_page_used(share, buff);
    keypos=buff + share->keypage_header + nod_flag;
3086
    endpos=buff + used_length;
3087

3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100
    for ( ;; )
    {
      if (nod_flag)
      {
	next_page= _ma_kpos(nod_flag,keypos);
        /* Save new pos */
	_ma_kpointer(info,keypos-nod_flag,param->new_file_pos);
	if (sort_one_index(param,info,keyinfo,next_page,new_file))
	{
	  DBUG_PRINT("error",
		     ("From page: %ld, keyoffset: %lu  used_length: %d",
		      (ulong) pagepos, (ulong) (keypos - buff),
		      (int) used_length));
unknown's avatar
unknown committed
3101
	  DBUG_DUMP("buff",(uchar*) buff,used_length);
3102 3103 3104 3105
	  goto err;
	}
      }
      if (keypos >= endpos ||
3106
	  !(*keyinfo->get_key)(&key, page_flag, nod_flag, &keypos))
3107 3108 3109 3110 3111 3112
	break;
      DBUG_ASSERT(keypos <= endpos);
      if (keyinfo->flag & HA_FULLTEXT)
      {
        uint off;
        int  subkeys;
3113 3114
        get_key_full_length_rdonly(off, key.data);
        subkeys= ft_sintXkorr(key.data + off);
3115 3116
        if (subkeys < 0)
        {
3117 3118
          next_page= _ma_row_pos_from_key(&key);
          _ma_dpointer(share, keypos - nod_flag - share->rec_reflength,
3119
                       param->new_file_pos); /* Save new pos */
3120
          if (sort_one_index(param,info,&share->ft2_keyinfo,
3121 3122 3123 3124 3125 3126 3127 3128
                             next_page,new_file))
            goto err;
        }
      }
    }
  }

  /* Fill block with zero and write it to the new index file */
3129
  length= _ma_get_page_used(share, buff);
unknown's avatar
unknown committed
3130
  bzero((uchar*) buff+length,keyinfo->block_length-length);
unknown's avatar
unknown committed
3131
  put_crc(buff, new_page_pos, share);
unknown's avatar
unknown committed
3132
  if (my_pwrite(new_file,(uchar*) buff,(uint) keyinfo->block_length,
3133 3134 3135 3136 3137
		new_page_pos,MYF(MY_NABP | MY_WAIT_IF_FULL)))
  {
    _ma_check_print_error(param,"Can't write indexblock, error: %d",my_errno);
    goto err;
  }
unknown's avatar
unknown committed
3138
  my_afree((uchar*) buff);
3139 3140
  DBUG_RETURN(0);
err:
unknown's avatar
unknown committed
3141
  my_afree((uchar*) buff);
3142 3143 3144 3145
  DBUG_RETURN(1);
} /* sort_one_index */


unknown's avatar
unknown committed
3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160
/**
   @brief Fill empty space in index file with zeroes

   @return
   @retval 0  Ok
   @retval 1  Error
*/

static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info,
                                    const char *name)
{
  MARIA_SHARE *share= info->s;
  MARIA_PINNED_PAGE page_link;
  char llbuff[21];
  uchar *buff;
3161
  pgcache_page_no_t page;
unknown's avatar
unknown committed
3162 3163 3164
  my_off_t pos;
  my_off_t key_file_length= share->state.state.key_file_length;
  uint block_size= share->block_size;
3165 3166
  my_bool zero_lsn= (share->base.born_transactional &&
                     !(param->testflag & T_ZEROFILL_KEEP_LSN));
unknown's avatar
unknown committed
3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186
  DBUG_ENTER("maria_zerofill_index");

  if (!(param->testflag & T_SILENT))
    printf("- Zerofilling index for MARIA-table '%s'\n",name);

  /* Go through the index file */
  for (pos= share->base.keystart, page= (ulonglong) (pos / block_size);
       pos < key_file_length;
       pos+= block_size, page++)
  {
    uint length;
    if (!(buff= pagecache_read(share->pagecache,
                               &share->kfile, page,
                               DFLT_INIT_HITS, 0,
                               PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
                               &page_link.link)))
    {
      pagecache_unlock_by_link(share->pagecache, page_link.link,
                               PAGECACHE_LOCK_WRITE_UNLOCK,
                               PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3187
                               LSN_IMPOSSIBLE, 0, FALSE);
unknown's avatar
unknown committed
3188 3189 3190 3191 3192
      _ma_check_print_error(param,
                            "Page %9s: Got error %d when reading index file",
                            llstr(pos, llbuff), my_errno);
      DBUG_RETURN(1);
    }
3193
    if (zero_lsn)
unknown's avatar
unknown committed
3194
      bzero(buff, LSN_SIZE);
3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213

    if (share->base.born_transactional)
    {
      uint keynr= _ma_get_keynr(share, buff);
      if (keynr != MARIA_DELETE_KEY_NR)
      {
        DBUG_ASSERT(keynr < share->base.keys);
        if (_ma_compact_keypage(info, share->keyinfo + keynr, pos,
                                buff, ~(TrID) 0))
        {
          _ma_check_print_error(param,
                                "Page %9s: Got error %d when reading index "
                                "file",
                                llstr(pos, llbuff), my_errno);
          DBUG_RETURN(1);
        }
      }
    }

unknown's avatar
unknown committed
3214
    length= _ma_get_page_used(share, buff);
unknown's avatar
unknown committed
3215 3216 3217
    DBUG_ASSERT(length <= block_size);
    if (length < block_size)
      bzero(buff + length, block_size - length);
unknown's avatar
unknown committed
3218 3219 3220
    pagecache_unlock_by_link(share->pagecache, page_link.link,
                             PAGECACHE_LOCK_WRITE_UNLOCK,
                             PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3221
                             LSN_IMPOSSIBLE, 1, FALSE);
unknown's avatar
unknown committed
3222 3223 3224 3225 3226 3227 3228 3229 3230
  }
  if (flush_pagecache_blocks(share->pagecache, &share->kfile,
                             FLUSH_FORCE_WRITE))
    DBUG_RETURN(1);
  DBUG_RETURN(0);
}


/**
unknown's avatar
unknown committed
3231
   @brief Fill empty space in data file with zeroes
unknown's avatar
unknown committed
3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248

   @todo
   Zerofill all pages marked in bitmap as empty and change them to
   be of type UNALLOCATED_PAGE

   @return
   @retval 0  Ok
   @retval 1  Error
*/

static my_bool maria_zerofill_data(HA_CHECK *param, MARIA_HA *info,
                                   const char *name)
{
  MARIA_SHARE *share= info->s;
  MARIA_PINNED_PAGE page_link;
  char llbuff[21];
  my_off_t pos;
3249
  pgcache_page_no_t page;
unknown's avatar
unknown committed
3250
  uint block_size= share->block_size;
unknown's avatar
unknown committed
3251
  MARIA_FILE_BITMAP *bitmap= &share->bitmap;
3252
  my_bool zero_lsn= !(param->testflag & T_ZEROFILL_KEEP_LSN);
unknown's avatar
unknown committed
3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263
  DBUG_ENTER("maria_zerofill_data");

  /* This works only with BLOCK_RECORD files */
  if (share->data_file_type != BLOCK_RECORD)
    DBUG_RETURN(0);

  if (!(param->testflag & T_SILENT))
    printf("- Zerofilling data  for MARIA-table '%s'\n",name);

  /* Go through the record file */
  for (page= 1, pos= block_size;
3264
       pos < share->state.state.data_file_length;
unknown's avatar
unknown committed
3265 3266 3267
       pos+= block_size, page++)
  {
    uchar *buff;
unknown's avatar
unknown committed
3268
    enum en_page_type page_type;
unknown's avatar
unknown committed
3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283

    /* Ignore bitmap pages */
    if ((page % share->bitmap.pages_covered) == 0)
      continue;
    if (!(buff= pagecache_read(share->pagecache,
                               &info->dfile,
                               page, 1, 0,
                               PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
                               &page_link.link)))
    {
      _ma_check_print_error(param,
                            "Page %9s:  Got error: %d when reading datafile",
                            llstr(pos, llbuff), my_errno);
      goto err;
    }
unknown's avatar
unknown committed
3284 3285
    page_type= (enum en_page_type) (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK);
    switch (page_type) {
unknown's avatar
unknown committed
3286
    case UNALLOCATED_PAGE:
3287 3288 3289 3290
      if (zero_lsn)
        bzero(buff, block_size);
      else
        bzero(buff + LSN_SIZE, block_size - LSN_SIZE);
unknown's avatar
unknown committed
3291 3292
      break;
    case BLOB_PAGE:
unknown's avatar
unknown committed
3293 3294 3295
      if (_ma_bitmap_get_page_bits(info, bitmap, page) == 0)
      {
        /* Unallocated page */
3296 3297 3298 3299
        if (zero_lsn)
          bzero(buff, block_size);
        else
          bzero(buff + LSN_SIZE, block_size - LSN_SIZE);
unknown's avatar
unknown committed
3300 3301
      }
      else
3302 3303
        if (zero_lsn)
          bzero(buff, LSN_SIZE);
unknown's avatar
unknown committed
3304 3305 3306 3307 3308 3309 3310 3311
      break;
    case HEAD_PAGE:
    case TAIL_PAGE:
    {
      uint max_entry= (uint) buff[DIR_COUNT_OFFSET];
      uint offset, dir_start;
      uchar *dir;

3312 3313
      if (zero_lsn)
        bzero(buff, LSN_SIZE);
unknown's avatar
unknown committed
3314 3315
      if (max_entry != 0)
      {
3316
        my_bool is_head_page= (page_type == HEAD_PAGE);
unknown's avatar
unknown committed
3317
        dir= dir_entry_pos(buff, block_size, max_entry - 1);
unknown's avatar
unknown committed
3318
        _ma_compact_block_page(buff, block_size, max_entry -1, 0,
3319 3320
                               is_head_page ? ~(TrID) 0 : 0,
                               is_head_page ?
unknown's avatar
unknown committed
3321
                               share->base.min_block_length : 0);
3322 3323 3324 3325
        /* compactation may have increased free space */
        if (_ma_bitmap_set(info, page, is_head_page,
                           uint2korr(buff + EMPTY_SPACE_OFFSET)))
          goto err;
unknown's avatar
unknown committed
3326

3327
        /* Zerofill the not used part */
unknown's avatar
unknown committed
3328 3329
        offset= uint2korr(dir) + uint2korr(dir+2);
        dir_start= (uint) (dir - buff);
3330
        DBUG_ASSERT(dir_start >= offset);
unknown's avatar
unknown committed
3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344
        if (dir_start > offset)
          bzero(buff + offset, dir_start - offset);
      }
      break;
    }
    default:
      _ma_check_print_error(param,
                            "Page %9s:  Found unrecognizable block of type %d",
                            llstr(pos, llbuff), page_type);
      goto err;
    }
    pagecache_unlock_by_link(share->pagecache, page_link.link,
                             PAGECACHE_LOCK_WRITE_UNLOCK,
                             PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3345
                             LSN_IMPOSSIBLE, 1, FALSE);
unknown's avatar
unknown committed
3346
  }
3347 3348 3349
  DBUG_RETURN(_ma_bitmap_flush(share) ||
              flush_pagecache_blocks(share->pagecache, &info->dfile,
                                     FLUSH_FORCE_WRITE));
unknown's avatar
unknown committed
3350 3351 3352 3353 3354

err:
  pagecache_unlock_by_link(share->pagecache, page_link.link,
                           PAGECACHE_LOCK_WRITE_UNLOCK,
                           PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3355
                           LSN_IMPOSSIBLE, 0, FALSE);
unknown's avatar
unknown committed
3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369
  DBUG_RETURN(1);
}


/**
   @brief Fill empty space in index and data files with zeroes

   @return
   @retval 0  Ok
   @retval 1  Error
*/

int maria_zerofill(HA_CHECK *param, MARIA_HA *info, const char *name)
{
3370 3371
  my_bool error, reenable_logging,
    zero_lsn= !(param->testflag & T_ZEROFILL_KEEP_LSN);
3372
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
3373
  DBUG_ENTER("maria_zerofill");
3374
  if ((reenable_logging= share->now_transactional))
3375 3376 3377 3378 3379 3380
    _ma_tmp_disable_logging_for_table(info, 0);
  if (!(error= (maria_zerofill_index(param, info, name) ||
                maria_zerofill_data(param, info, name) ||
                _ma_set_uuid(info, 0))))
  {
    /*
3381 3382
      Mark that we have done zerofill of data and index. If we zeroed pages'
      LSN, table is movable.
3383
    */
3384
    share->state.changed&= ~STATE_NOT_ZEROFILLED;
3385
    if (zero_lsn)
3386 3387 3388 3389 3390 3391 3392
    {
      share->state.changed&= ~(STATE_NOT_MOVABLE | STATE_MOVED);
      /* Table should get new LSNs */
      share->state.create_rename_lsn= share->state.is_of_horizon=
        share->state.skip_redo_lsn= LSN_NEEDS_NEW_STATE_LSNS;
    }
    /* Ensure state is later flushed to disk, if within maria_chk */
3393
    info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
3394 3395

    /* Reset create_trid to make file comparable */
3396
    share->state.create_trid= 0;
3397 3398 3399 3400
  }
  if (reenable_logging)
    _ma_reenable_logging_for_table(info, FALSE);
  DBUG_RETURN(error);
unknown's avatar
unknown committed
3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411
}


/*
  Let temporary file replace old file.
  This assumes that the new file was created in the same
  directory as given by realpath(filename).
  This will ensure that any symlinks that are used will still work.
  Copy stats from old file to new file, deletes orignal and
  changes new file name to old file name
*/
3412 3413

int maria_change_to_newfile(const char * filename, const char * old_ext,
unknown's avatar
unknown committed
3414
                            const char * new_ext, myf MyFlags)
3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431
{
  char old_filename[FN_REFLEN],new_filename[FN_REFLEN];
#ifdef USE_RAID
  if (raid_chunks)
    return my_raid_redel(fn_format(old_filename,filename,"",old_ext,2+4),
			 fn_format(new_filename,filename,"",new_ext,2+4),
			 raid_chunks,
			 MYF(MY_WME | MY_LINK_WARNING | MyFlags));
#endif
  /* Get real path to filename */
  (void) fn_format(old_filename,filename,"",old_ext,2+4+32);
  return my_redel(old_filename,
		  fn_format(new_filename,old_filename,"",new_ext,2+4),
		  MYF(MY_WME | MY_LINK_WARNING | MyFlags));
} /* maria_change_to_newfile */


3432
/* Copy a block between two files */
3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449

int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start,
	     my_off_t length, const char *type)
{
  char tmp_buff[IO_SIZE],*buff;
  ulong buff_length;
  DBUG_ENTER("maria_filecopy");

  buff_length=(ulong) min(param->write_buffer_length,length);
  if (!(buff=my_malloc(buff_length,MYF(0))))
  {
    buff=tmp_buff; buff_length=IO_SIZE;
  }

  VOID(my_seek(from,start,MY_SEEK_SET,MYF(0)));
  while (length > buff_length)
  {
unknown's avatar
unknown committed
3450 3451
    if (my_read(from,(uchar*) buff,buff_length,MYF(MY_NABP)) ||
	my_write(to,(uchar*) buff,buff_length,param->myf_rw))
3452 3453 3454
      goto err;
    length-= buff_length;
  }
unknown's avatar
unknown committed
3455 3456
  if (my_read(from,(uchar*) buff,(uint) length,MYF(MY_NABP)) ||
      my_write(to,(uchar*) buff,(uint) length,param->myf_rw))
3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485
    goto err;
  if (buff != tmp_buff)
    my_free(buff,MYF(0));
  DBUG_RETURN(0);
err:
  if (buff != tmp_buff)
    my_free(buff,MYF(0));
  _ma_check_print_error(param,"Can't copy %s to tempfile, error %d",
		       type,my_errno);
  DBUG_RETURN(1);
}


/*
  Repair table or given index using sorting

  SYNOPSIS
    maria_repair_by_sort()
    param		Repair parameters
    info		MARIA handler to repair
    name		Name of table (for warnings)
    rep_quick		set to <> 0 if we should not change data file

  RESULT
    0	ok
    <>0	Error
*/

int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
3486
                         const char * name, my_bool rep_quick)
3487 3488 3489 3490
{
  int got_error;
  uint i;
  ha_rows start_records;
3491
  my_off_t new_header_length, org_header_length, del;
3492 3493
  File new_file;
  MARIA_SORT_PARAM sort_param;
3494
  MARIA_SHARE *share= info->s;
3495
  HA_KEYSEG *keyseg;
3496
  double  *rec_per_key_part;
3497 3498
  char llbuff[22];
  MARIA_SORT_INFO sort_info;
3499
  ulonglong key_map;
unknown's avatar
unknown committed
3500
  myf sync_dir= ((share->now_transactional && !share->temporary) ?
3501
                 MY_SYNC_DIR : 0);
3502
  my_bool scan_inited= 0;
3503
  DBUG_ENTER("maria_repair_by_sort");
3504
  LINT_INIT(key_map);
3505

3506
  got_error= 1;
3507
  new_file= -1;
3508
  start_records= share->state.state.records;
3509 3510 3511 3512 3513 3514
  if (!(param->testflag & T_SILENT))
  {
    printf("- recovering (with sort) MARIA-table '%s'\n",name);
    printf("Data records: %s\n", llstr(start_records,llbuff));
  }

3515 3516
  if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
                                      rep_quick))
3517 3518
    goto err;

3519 3520 3521 3522
  org_header_length= share->pack.header_length;
  new_header_length= (param->testflag & T_UNPACK) ? 0 : org_header_length;
  sort_param.filepos= new_header_length;

3523 3524 3525
  if (!rep_quick)
  {
    /* Get real path for data file */
unknown's avatar
unknown committed
3526
    if ((new_file=my_create(fn_format(param->temp_filename,
3527
                                      share->data_file_name.str, "",
unknown's avatar
unknown committed
3528 3529 3530
                                      DATA_TMP_EXT, 2+4),
                            0,param->tmpfile_createflag,
                            MYF(0))) < 0)
3531 3532 3533 3534 3535
    {
      _ma_check_print_error(param,"Can't create new tempfile: '%s'",
			   param->temp_filename);
      goto err;
    }
3536
    if (new_header_length &&
unknown's avatar
unknown committed
3537 3538
        maria_filecopy(param, new_file, info->dfile.file, 0L,
                       new_header_length, "datafile-header"))
3539
      goto err;
3540

3541
    share->state.dellink= HA_OFFSET_ERROR;
3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558
    info->rec_cache.file= new_file;             /* For sort_delete_record */
    if (share->data_file_type == BLOCK_RECORD ||
        (param->testflag & T_UNPACK))
    {
      if (create_new_data_handle(&sort_param, new_file))
        goto err;
      sort_info.new_info->rec_cache.file= new_file;
    }
  }

  if (!(sort_info.key_block=
	alloc_key_blocks(param,
			 (uint) param->sort_key_blocks,
			 share->base.max_key_block_length)))
    goto err;
  sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;

3559
  if (share->data_file_type != BLOCK_RECORD)
3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580
  {
    /* We need a read buffer to read rows in big blocks */
    if (init_io_cache(&param->read_cache, info->dfile.file,
                      (uint) param->read_buffer_length,
                      READ_CACHE, org_header_length, 1, MYF(MY_WME)))
      goto err;
  }
  if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
  {
    /* When writing to not block records, we need a write buffer */
    if (!rep_quick)
    {
      if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
                        (uint) param->write_buffer_length,
                        WRITE_CACHE, new_header_length, 1,
                        MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
        goto err;
      sort_info.new_info->opt_flag|= WRITE_CACHE_USED;
    }
  }

3581 3582 3583
  if (!(sort_param.record=
        (uchar*) my_malloc((size_t) share->base.default_rec_buff_size,
                           MYF(0))) ||
3584
      _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
3585
                       share->base.default_rec_buff_size))
3586 3587 3588
  {
    _ma_check_print_error(param, "Not enough memory for extra record");
    goto err;
3589 3590
  }

3591 3592 3593 3594
  /* Optionally drop indexes and optionally modify the key_map */
  maria_drop_all_indexes(param, info, FALSE);
  key_map= share->state.key_map;
  if (param->testflag & T_CREATE_MISSING_KEYS)
3595
  {
3596 3597
    /* Invert the copied key_map to recreate all disabled indexes. */
    key_map= ~key_map;
3598 3599
  }

3600
  param->read_cache.end_of_file= sort_info.filelength;
3601
  sort_param.wordlist=NULL;
3602
  init_alloc_root(&sort_param.wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0);
3603 3604 3605 3606 3607 3608

  sort_param.key_cmp=sort_key_cmp;
  sort_param.lock_in_memory=maria_lock_memory;
  sort_param.tmpdir=param->tmpdir;
  sort_param.master =1;

3609
  del=share->state.state.del;
3610

3611
  rec_per_key_part= param->new_rec_per_key_part;
3612 3613 3614 3615
  for (sort_param.key=0 ; sort_param.key < share->base.keys ;
       rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++)
  {
    sort_param.keyinfo=share->keyinfo+sort_param.key;
3616 3617 3618 3619
    /*
      Skip this index if it is marked disabled in the copied
      (and possibly inverted) key_map.
    */
3620 3621 3622 3623 3624
    if (! maria_is_key_active(key_map, sort_param.key))
    {
      /* Remember old statistics for key */
      memcpy((char*) rec_per_key_part,
	     (char*) (share->state.rec_per_key_part +
3625
		      (uint) (rec_per_key_part - param->new_rec_per_key_part)),
3626
	     sort_param.keyinfo->keysegs*sizeof(*rec_per_key_part));
3627 3628
      DBUG_PRINT("repair", ("skipping seemingly disabled index #: %u",
                            sort_param.key));
3629 3630 3631 3632 3633
      continue;
    }

    if ((!(param->testflag & T_SILENT)))
      printf ("- Fixing index %d\n",sort_param.key+1);
3634 3635 3636

    sort_param.read_cache=param->read_cache;
    sort_param.seg=sort_param.keyinfo->seg;
3637
    sort_param.max_pos= sort_param.pos= org_header_length;
3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650
    keyseg=sort_param.seg;
    bzero((char*) sort_param.unique,sizeof(sort_param.unique));
    sort_param.key_length=share->rec_reflength;
    for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++)
    {
      sort_param.key_length+=keyseg[i].length;
      if (keyseg[i].flag & HA_SPACE_PACK)
	sort_param.key_length+=get_pack_length(keyseg[i].length);
      if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
	sort_param.key_length+=2 + test(keyseg[i].length >= 127);
      if (keyseg[i].flag & HA_NULL_PART)
	sort_param.key_length++;
    }
3651 3652
    share->state.state.records=share->state.state.del=share->state.split=0;
    share->state.state.empty=0;
3653 3654 3655 3656 3657

    if (sort_param.keyinfo->flag & HA_FULLTEXT)
    {
      uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
                                    sort_param.keyinfo->seg->charset->mbmaxlen;
3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679
      sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
      /*
        fulltext indexes may have much more entries than the
        number of rows in the table. We estimate the number here.

        Note, built-in parser is always nr. 0 - see ftparser_call_initializer()
      */
      if (sort_param.keyinfo->ftparser_nr == 0)
      {
        /*
          for built-in parser the number of generated index entries
          cannot be larger than the size of the data file divided
          by the minimal word's length
        */
        sort_info.max_records=
          (ha_rows) (sort_info.filelength/ft_min_word_len+1);
      }
      else
      {
        /*
          for external plugin parser we cannot tell anything at all :(
          so, we'll use all the sort memory and start from ~10 buffpeks.
3680
          (see _ma_create_index_by_sort)
3681 3682 3683 3684
        */
        sort_info.max_records=
          10*param->sort_buffer_length/sort_param.key_length;
      }
3685

unknown's avatar
unknown committed
3686 3687
      sort_param.key_read=  sort_maria_ft_key_read;
      sort_param.key_write= sort_maria_ft_key_write;
3688 3689 3690
    }
    else
    {
unknown's avatar
unknown committed
3691 3692
      sort_param.key_read=  sort_key_read;
      sort_param.key_write= sort_key_write;
3693 3694
    }

3695 3696 3697 3698 3699 3700
    if (sort_info.new_info->s->data_file_type == BLOCK_RECORD)
    {
      scan_inited= 1;
      if (maria_scan_init(sort_info.info))
        goto err;
    }
3701
    if (_ma_create_index_by_sort(&sort_param,
3702 3703
                                 (my_bool) (!(param->testflag & T_VERBOSE)),
                                 (size_t) param->sort_buffer_length))
3704 3705
    {
      param->retry_repair=1;
unknown's avatar
unknown committed
3706
      _ma_check_print_error(param, "Create index by sort failed");
3707 3708
      goto err;
    }
unknown's avatar
unknown committed
3709 3710 3711 3712 3713 3714 3715 3716
    DBUG_EXECUTE_IF("maria_flush_whole_log",
                    {
                      DBUG_PRINT("maria_flush_whole_log", ("now"));
                      translog_flush(translog_get_horizon());
                    });
    DBUG_EXECUTE_IF("maria_crash_create_index_by_sort",
                    {
                      DBUG_PRINT("maria_crash_create_index_by_sort", ("now"));
3717
                      DBUG_ABORT();
unknown's avatar
unknown committed
3718
                    });
3719 3720 3721 3722 3723 3724
    if (scan_inited)
    {
      scan_inited= 0;
      maria_scan_end(sort_info.info);
    }

3725 3726
    /* No need to calculate checksum again. */
    sort_param.calc_checksum= 0;
3727
    free_root(&sort_param.wordroot, MYF(0));
3728 3729

    /* Set for next loop */
3730
    sort_info.max_records= (ha_rows) sort_info.new_info->s->state.state.records;
3731
    if (param->testflag & T_STATISTICS)
3732 3733
      maria_update_key_parts(sort_param.keyinfo, rec_per_key_part,
                             sort_param.unique,
3734 3735 3736
                             (param->stats_method ==
                              MI_STATS_METHOD_IGNORE_NULLS ?
                              sort_param.notnull : NULL),
3737
                             (ulonglong) share->state.state.records);
3738
    maria_set_key_active(share->state.key_map, sort_param.key);
3739
    DBUG_PRINT("repair", ("set enabled index #: %u", sort_param.key));
3740

unknown's avatar
unknown committed
3741 3742 3743
    if (_ma_flush_table_files_before_swap(param, info))
      goto err;

3744 3745 3746
    if (sort_param.fix_datafile)
    {
      param->read_cache.end_of_file=sort_param.filepos;
3747 3748
      if (maria_write_data_suffix(&sort_info,1) ||
          end_io_cache(&sort_info.new_info->rec_cache))
unknown's avatar
unknown committed
3749 3750
      {
        _ma_check_print_error(param, "Got error when flushing row cache");
3751
	goto err;
unknown's avatar
unknown committed
3752
      }
3753 3754
      sort_info.new_info->opt_flag&= ~WRITE_CACHE_USED;

3755 3756 3757
      if (param->testflag & T_SAFE_REPAIR)
      {
	/* Don't repair if we loosed more than one row */
3758
	if (share->state.state.records+1 < start_records)
3759
	{
unknown's avatar
unknown committed
3760 3761 3762
          _ma_check_print_error(param,
                                "Rows lost; Aborting because safe repair was "
                                "requested");
3763
          share->state.state.records=start_records;
3764 3765 3766
	  goto err;
	}
      }
3767

3768
      sort_info.new_info->s->state.state.data_file_length= sort_param.filepos;
3769 3770 3771 3772 3773 3774 3775 3776
      if (sort_info.new_info != sort_info.info)
      {
        MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
        if (maria_close(sort_info.new_info))
        {
          _ma_check_print_error(param, "Got error %d on close", my_errno);
          goto err;
        }
3777
        copy_data_file_state(&share->state, &save_state);
3778 3779
        new_file= -1;
        sort_info.new_info= info;
unknown's avatar
unknown committed
3780
        info->rec_cache.file= info->dfile.file;
3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791
      }

      share->state.version=(ulong) time((time_t*) 0);	/* Force reopen */

      /* Replace the actual file with the temporary file */
      if (new_file >= 0)
      {
        my_close(new_file, MYF(MY_WME));
        new_file= -1;
      }
      change_data_file_descriptor(info, -1);
3792
      if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
3793 3794 3795 3796 3797 3798
                                  DATA_TMP_EXT,
                                  (param->testflag & T_BACKUP_DATA ?
                                   MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
                                  sync_dir) ||
          _ma_open_datafile(info, share, -1))
      {
unknown's avatar
unknown committed
3799
        _ma_check_print_error(param, "Couldn't change to new data file");
3800 3801 3802 3803 3804 3805
        goto err;
      }
      if (param->testflag & T_UNPACK)
        restore_data_file_type(share);

      org_header_length= share->pack.header_length;
3806
      sort_info.org_data_file_type= share->data_file_type;
3807
      sort_info.filelength= share->state.state.data_file_length;
3808 3809 3810
      sort_param.fix_datafile=0;
    }
    else
3811
      share->state.state.data_file_length=sort_param.max_pos;
3812

unknown's avatar
unknown committed
3813
    param->read_cache.file= info->dfile.file;	/* re-init read cache */
3814 3815 3816 3817 3818 3819 3820 3821 3822
    reinit_io_cache(&param->read_cache,READ_CACHE,share->pack.header_length,
                    1,1);
  }

  if (param->testflag & T_WRITE_LOOP)
  {
    VOID(fputs("          \r",stdout)); VOID(fflush(stdout));
  }

3823
  if (rep_quick && del+sort_info.dupp != share->state.state.del)
3824 3825 3826 3827 3828 3829 3830 3831 3832
  {
    _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records");
    _ma_check_print_error(param,"Run recovery again without -q");
    got_error=1;
    param->retry_repair=1;
    param->testflag|=T_RETRY_WITHOUT_QUICK;
    goto err;
  }

3833
  if (rep_quick && (param->testflag & T_FORCE_UNIQUENESS))
3834
  {
3835
    my_off_t skr= (share->state.state.data_file_length +
3836 3837
                   (sort_info.org_data_file_type == COMPRESSED_RECORD) ?
                   MEMMAP_EXTRA_MARGIN : 0);
3838
#ifdef USE_RELOC
3839
    if (sort_info.org_data_file_type == STATIC_RECORD &&
3840 3841 3842
	skr < share->base.reloc*share->base.min_pack_length)
      skr=share->base.reloc*share->base.min_pack_length;
#endif
unknown's avatar
unknown committed
3843
    if (skr != sort_info.filelength)
unknown's avatar
unknown committed
3844
      if (my_chsize(info->dfile.file, skr, 0, MYF(0)))
3845 3846 3847 3848
	_ma_check_print_warning(param,
			       "Can't change size of datafile,  error: %d",
			       my_errno);
  }
3849

3850
  if (param->testflag & T_CALC_CHECKSUM)
3851
    share->state.state.checksum=param->glob_crc;
3852

3853
  if (my_chsize(share->kfile.file, share->state.state.key_file_length, 0, MYF(0)))
3854 3855 3856 3857 3858 3859
    _ma_check_print_warning(param,
			   "Can't change size of indexfile, error: %d",
			   my_errno);

  if (!(param->testflag & T_SILENT))
  {
3860 3861
    if (start_records != share->state.state.records)
      printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
3862
  }
unknown's avatar
unknown committed
3863 3864 3865 3866
  if (sort_info.dupp)
    _ma_check_print_warning(param,
                            "%s records have been removed",
                            llstr(sort_info.dupp,llbuff));
3867
  got_error=0;
3868
  /* If invoked by external program that uses thr_lock */
3869
  if (&share->state.state != info->state)
3870
    *info->state= *info->state_start= share->state.state;
3871 3872

err:
3873 3874
  if (scan_inited)
    maria_scan_end(sort_info.info);
3875
  _ma_reset_state(info);
3876 3877

  VOID(end_io_cache(&sort_info.new_info->rec_cache));
unknown's avatar
unknown committed
3878 3879
  VOID(end_io_cache(&param->read_cache));
  info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
3880
  sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
3881 3882 3883 3884
  if (got_error)
  {
    if (! param->error_printed)
      _ma_check_print_error(param,"%d when fixing table",my_errno);
unknown's avatar
unknown committed
3885
    (void)_ma_flush_table_files_before_swap(param, info);
3886 3887 3888 3889 3890
    if (sort_info.new_info && sort_info.new_info != sort_info.info)
    {
      unuse_data_file_descriptor(sort_info.new_info);
      maria_close(sort_info.new_info);
    }
3891 3892 3893
    if (new_file >= 0)
    {
      VOID(my_close(new_file,MYF(0)));
unknown's avatar
unknown committed
3894
      VOID(my_delete(param->temp_filename, MYF(MY_WME)));
3895 3896 3897
    }
    maria_mark_crashed_on_repair(info);
  }
3898 3899 3900 3901 3902 3903 3904 3905
  else
  {
    if (key_map == share->state.key_map)
      share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
    /*
      Now that we have flushed and forced everything, we can bump
      create_rename_lsn:
    */
unknown's avatar
unknown committed
3906 3907 3908 3909 3910 3911 3912 3913
    DBUG_EXECUTE_IF("maria_flush_whole_log",
                    {
                      DBUG_PRINT("maria_flush_whole_log", ("now"));
                      translog_flush(translog_get_horizon());
                    });
    DBUG_EXECUTE_IF("maria_crash_repair",
                    {
                      DBUG_PRINT("maria_crash_repair", ("now"));
3914
                      DBUG_ABORT();
unknown's avatar
unknown committed
3915
                    });
3916
  }
unknown's avatar
unknown committed
3917
  share->state.changed|= STATE_NOT_SORTED_PAGES;
unknown's avatar
unknown committed
3918 3919 3920
  if (!rep_quick)
    share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED |
                             STATE_NOT_MOVABLE);
3921

unknown's avatar
unknown committed
3922
  my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR));
3923
  my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR));
unknown's avatar
unknown committed
3924 3925
  my_free((uchar*) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR));
  my_free((uchar*) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR));
3926 3927 3928 3929
  my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR));
  DBUG_RETURN(got_error);
}

unknown's avatar
unknown committed
3930

3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945
/*
  Threaded repair of table using sorting

  SYNOPSIS
    maria_repair_parallel()
    param		Repair parameters
    info		MARIA handler to repair
    name		Name of table (for warnings)
    rep_quick		set to <> 0 if we should not change data file

  DESCRIPTION
    Same as maria_repair_by_sort but do it multithreaded
    Each key is handled by a separate thread.
    TODO: make a number of threads a parameter

3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967
    In parallel repair we use one thread per index. There are two modes:

    Quick

      Only the indexes are rebuilt. All threads share a read buffer.
      Every thread that needs fresh data in the buffer enters the shared
      cache lock. The last thread joining the lock reads the buffer from
      the data file and wakes all other threads.

    Non-quick

      The data file is rebuilt and all indexes are rebuilt to point to
      the new record positions. One thread is the master thread. It
      reads from the old data file and writes to the new data file. It
      also creates one of the indexes. The other threads read from a
      buffer which is filled by the master. If they need fresh data,
      they enter the shared cache lock. If the masters write buffer is
      full, it flushes it to the new data file and enters the shared
      cache lock too. When all threads joined in the lock, the master
      copies its write buffer to the read buffer for the other threads
      and wakes them.

3968 3969 3970 3971 3972 3973
  RESULT
    0	ok
    <>0	Error
*/

int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
3974
			const char * name, my_bool rep_quick)
3975 3976 3977 3978 3979 3980 3981 3982 3983
{
#ifndef THREAD
  return maria_repair_by_sort(param, info, name, rep_quick);
#else
  int got_error;
  uint i,key, total_key_length, istep;
  ha_rows start_records;
  my_off_t new_header_length,del;
  File new_file;
3984
  MARIA_SORT_PARAM *sort_param=0, tmp_sort_param;
3985
  MARIA_SHARE *share= info->s;
3986
  double  *rec_per_key_part;
3987 3988
  HA_KEYSEG *keyseg;
  char llbuff[22];
3989
  IO_CACHE new_data_cache; /* For non-quick repair. */
3990 3991
  IO_CACHE_SHARE io_share;
  MARIA_SORT_INFO sort_info;
3992
  ulonglong key_map;
3993
  pthread_attr_t thr_attr;
3994 3995
  myf sync_dir= ((share->now_transactional && !share->temporary) ?
                 MY_SYNC_DIR : 0);
3996
  DBUG_ENTER("maria_repair_parallel");
3997
  LINT_INIT(key_map);
3998

3999
  got_error= 1;
4000
  new_file= -1;
4001
  start_records= share->state.state.records;
4002 4003 4004
  if (!(param->testflag & T_SILENT))
  {
    printf("- parallel recovering (with sort) MARIA-table '%s'\n",name);
4005
    printf("Data records: %s\n", llstr(start_records, llbuff));
4006 4007
  }

4008 4009
  if (initialize_variables_for_repair(param, &sort_info, &tmp_sort_param, info,
                                      rep_quick))
4010 4011
    goto err;

4012 4013 4014
  new_header_length= ((param->testflag & T_UNPACK) ? 0 :
                      share->pack.header_length);

4015 4016 4017
  /*
    Quick repair (not touching data file, rebuilding indexes):
    {
4018
      Read cache is (HA_CHECK *param)->read_cache using info->dfile.file.
4019 4020 4021 4022 4023 4024
    }

    Non-quick repair (rebuilding data file and indexes):
    {
      Master thread:

4025 4026
        Read  cache is (HA_CHECK *param)->read_cache using info->dfile.file.
        Write cache is (MARIA_INFO *info)->rec_cache using new_file.
4027 4028 4029

      Slave threads:

4030
        Read cache is new_data_cache synced to master rec_cache.
4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042

      The final assignment of the filedescriptor for rec_cache is done
      after the cache creation.

      Don't check file size on new_data_cache, as the resulting file size
      is not known yet.

      As rec_cache and new_data_cache are synced, write_buffer_length is
      used for the read cache 'new_data_cache'. Both start at the same
      position 'new_header_length'.
    }
  */
4043
  DBUG_PRINT("info", ("is quick repair: %d", (int) rep_quick));
4044

4045 4046 4047 4048
  /* Initialize pthread structures before goto err. */
  pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST);
  pthread_cond_init(&sort_info.cond, 0);

4049
  if (!(sort_info.key_block=
4050 4051
	alloc_key_blocks(param, (uint) param->sort_key_blocks,
			 share->base.max_key_block_length)) ||
unknown's avatar
unknown committed
4052
      init_io_cache(&param->read_cache, info->dfile.file,
4053 4054 4055
                    (uint) param->read_buffer_length,
                    READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)) ||
      (!rep_quick &&
unknown's avatar
unknown committed
4056
       (init_io_cache(&info->rec_cache, info->dfile.file,
4057 4058 4059 4060 4061 4062 4063
                      (uint) param->write_buffer_length,
                      WRITE_CACHE, new_header_length, 1,
                      MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw) ||
        init_io_cache(&new_data_cache, -1,
                      (uint) param->write_buffer_length,
                      READ_CACHE, new_header_length, 1,
                      MYF(MY_WME | MY_DONT_CHECK_FILESIZE)))))
4064 4065 4066
    goto err;
  sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
  info->opt_flag|=WRITE_CACHE_USED;
unknown's avatar
unknown committed
4067
  info->rec_cache.file= info->dfile.file;         /* for sort_delete_record */
4068 4069 4070 4071

  if (!rep_quick)
  {
    /* Get real path for data file */
unknown's avatar
unknown committed
4072
    if ((new_file= my_create(fn_format(param->temp_filename,
4073
                                       share->data_file_name.str, "",
unknown's avatar
unknown committed
4074 4075 4076 4077
                                       DATA_TMP_EXT,
                                       2+4),
                             0,param->tmpfile_createflag,
                             MYF(0))) < 0)
4078 4079 4080 4081 4082
    {
      _ma_check_print_error(param,"Can't create new tempfile: '%s'",
			   param->temp_filename);
      goto err;
    }
unknown's avatar
unknown committed
4083
    if (new_header_length &&
unknown's avatar
unknown committed
4084
        maria_filecopy(param, new_file, info->dfile.file,0L,new_header_length,
unknown's avatar
unknown committed
4085
                       "datafile-header"))
4086 4087
      goto err;
    if (param->testflag & T_UNPACK)
unknown's avatar
unknown committed
4088
      restore_data_file_type(share);
4089 4090 4091 4092
    share->state.dellink= HA_OFFSET_ERROR;
    info->rec_cache.file=new_file;
  }

4093 4094 4095 4096
  /* Optionally drop indexes and optionally modify the key_map. */
  maria_drop_all_indexes(param, info, FALSE);
  key_map= share->state.key_map;
  if (param->testflag & T_CREATE_MISSING_KEYS)
4097
  {
4098 4099
    /* Invert the copied key_map to recreate all disabled indexes. */
    key_map= ~key_map;
4100 4101
  }

4102
  param->read_cache.end_of_file= sort_info.filelength;
4103 4104 4105

  /*
    +1 below is required hack for parallel repair mode.
4106
    The share->state.state.records value, that is compared later
4107 4108 4109 4110 4111 4112 4113 4114 4115
    to sort_info.max_records and cannot exceed it, is
    increased in sort_key_write. In maria_repair_by_sort, sort_key_write
    is called after sort_key_read, where the comparison is performed,
    but in parallel mode master thread can call sort_key_write
    before some other repair thread calls sort_key_read.
    Furthermore I'm not even sure +1 would be enough.
    May be sort_info.max_records shold be always set to max value in
    parallel mode.
  */
4116
  sort_info.max_records++;
4117

4118
  del=share->state.state.del;
4119 4120 4121 4122 4123 4124 4125 4126 4127 4128

  if (!(sort_param=(MARIA_SORT_PARAM *)
        my_malloc((uint) share->base.keys *
		  (sizeof(MARIA_SORT_PARAM) + share->base.pack_reclength),
		  MYF(MY_ZEROFILL))))
  {
    _ma_check_print_error(param,"Not enough memory for key!");
    goto err;
  }
  total_key_length=0;
4129
  rec_per_key_part= param->new_rec_per_key_part;
4130 4131
  share->state.state.records=share->state.state.del=share->state.split=0;
  share->state.state.empty=0;
4132 4133 4134 4135 4136 4137 4138

  for (i=key=0, istep=1 ; key < share->base.keys ;
       rec_per_key_part+=sort_param[i].keyinfo->keysegs, i+=istep, key++)
  {
    sort_param[i].key=key;
    sort_param[i].keyinfo=share->keyinfo+key;
    sort_param[i].seg=sort_param[i].keyinfo->seg;
4139 4140 4141 4142
    /*
      Skip this index if it is marked disabled in the copied
      (and possibly inverted) key_map.
    */
4143 4144 4145 4146 4147
    if (! maria_is_key_active(key_map, key))
    {
      /* Remember old statistics for key */
      memcpy((char*) rec_per_key_part,
	     (char*) (share->state.rec_per_key_part+
4148
		      (uint) (rec_per_key_part - param->new_rec_per_key_part)),
4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171
	     sort_param[i].keyinfo->keysegs*sizeof(*rec_per_key_part));
      istep=0;
      continue;
    }
    istep=1;
    if ((!(param->testflag & T_SILENT)))
      printf ("- Fixing index %d\n",key+1);
    if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
    {
      sort_param[i].key_read=sort_maria_ft_key_read;
      sort_param[i].key_write=sort_maria_ft_key_write;
    }
    else
    {
      sort_param[i].key_read=sort_key_read;
      sort_param[i].key_write=sort_key_write;
    }
    sort_param[i].key_cmp=sort_key_cmp;
    sort_param[i].lock_in_memory=maria_lock_memory;
    sort_param[i].tmpdir=param->tmpdir;
    sort_param[i].sort_info=&sort_info;
    sort_param[i].master=0;
    sort_param[i].fix_datafile=0;
4172
    sort_param[i].calc_checksum= 0;
4173 4174 4175 4176

    sort_param[i].filepos=new_header_length;
    sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length;

4177
    sort_param[i].record= (((uchar *)(sort_param+share->base.keys))+
unknown's avatar
unknown committed
4178
                          (share->base.pack_reclength * i));
unknown's avatar
unknown committed
4179 4180
    if (_ma_alloc_buffer(&sort_param[i].rec_buff, &sort_param[i].rec_buff_size,
                         share->base.default_rec_buff_size))
4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203
    {
      _ma_check_print_error(param,"Not enough memory!");
      goto err;
    }
    sort_param[i].key_length=share->rec_reflength;
    for (keyseg=sort_param[i].seg; keyseg->type != HA_KEYTYPE_END;
	 keyseg++)
    {
      sort_param[i].key_length+=keyseg->length;
      if (keyseg->flag & HA_SPACE_PACK)
        sort_param[i].key_length+=get_pack_length(keyseg->length);
      if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
        sort_param[i].key_length+=2 + test(keyseg->length >= 127);
      if (keyseg->flag & HA_NULL_PART)
        sort_param[i].key_length++;
    }
    total_key_length+=sort_param[i].key_length;

    if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
    {
      uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
                                    sort_param[i].keyinfo->seg->charset->mbmaxlen;
      sort_param[i].key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
4204
      init_alloc_root(&sort_param[i].wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0);
4205 4206 4207 4208
    }
  }
  sort_info.total_keys=i;
  sort_param[0].master= 1;
4209
  sort_param[0].fix_datafile= ! rep_quick;
4210
  sort_param[0].calc_checksum= test(param->testflag & T_CALC_CHECKSUM);
4211 4212 4213 4214

  sort_info.got_error=0;
  pthread_mutex_lock(&sort_info.mutex);

4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230
  /*
    Initialize the I/O cache share for use with the read caches and, in
    case of non-quick repair, the write cache. When all threads join on
    the cache lock, the writer copies the write cache contents to the
    read caches.
  */
  if (i > 1)
  {
    if (rep_quick)
      init_io_cache_share(&param->read_cache, &io_share, NULL, i);
    else
      init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i);
  }
  else
    io_share.total_threads= 0; /* share not used */

4231 4232 4233 4234 4235
  (void) pthread_attr_init(&thr_attr);
  (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);

  for (i=0 ; i < sort_info.total_keys ; i++)
  {
4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248
    /*
      Copy the properly initialized IO_CACHE structure so that every
      thread has its own copy. In quick mode param->read_cache is shared
      for use by all threads. In non-quick mode all threads but the
      first copy the shared new_data_cache, which is synchronized to the
      write cache of the first thread. The first thread copies
      param->read_cache, which is not shared.
    */
    sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache :
                               new_data_cache);
    DBUG_PRINT("io_cache_share", ("thread: %u  read_cache: 0x%lx",
                                  i, (long) &sort_param[i].read_cache));

4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265
    /*
      two approaches: the same amount of memory for each thread
      or the memory for the same number of keys for each thread...
      In the second one all the threads will fill their sort_buffers
      (and call write_keys) at the same time, putting more stress on i/o.
    */
    sort_param[i].sortbuff_size=
#ifndef USING_SECOND_APPROACH
      param->sort_buffer_length/sort_info.total_keys;
#else
      param->sort_buffer_length*sort_param[i].key_length/total_key_length;
#endif
    if (pthread_create(&sort_param[i].thr, &thr_attr,
		       _ma_thr_find_all_keys,
		       (void *) (sort_param+i)))
    {
      _ma_check_print_error(param,"Cannot start a repair thread");
4266 4267 4268 4269
      /* Cleanup: Detach from the share. Avoid others to be blocked. */
      if (io_share.total_threads)
        remove_io_thread(&sort_param[i].read_cache);
      DBUG_PRINT("error", ("Cannot start a repair thread"));
4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288
      sort_info.got_error=1;
    }
    else
      sort_info.threads_running++;
  }
  (void) pthread_attr_destroy(&thr_attr);

  /* waiting for all threads to finish */
  while (sort_info.threads_running)
    pthread_cond_wait(&sort_info.cond, &sort_info.mutex);
  pthread_mutex_unlock(&sort_info.mutex);

  if ((got_error= _ma_thr_write_keys(sort_param)))
  {
    param->retry_repair=1;
    goto err;
  }
  got_error=1;				/* Assume the following may go wrong */

unknown's avatar
unknown committed
4289 4290 4291
  if (_ma_flush_table_files_before_swap(param, info))
    goto err;

4292 4293
  if (sort_param[0].fix_datafile)
  {
4294
    /*
unknown's avatar
unknown committed
4295
      Append some nulls to the end of a memory mapped file. Destroy the
4296 4297 4298
      write cache. The master thread did already detach from the share
      by remove_io_thread() in sort.c:thr_find_all_keys().
    */
unknown's avatar
unknown committed
4299 4300
    if (maria_write_data_suffix(&sort_info,1) ||
        end_io_cache(&info->rec_cache))
4301 4302 4303 4304
      goto err;
    if (param->testflag & T_SAFE_REPAIR)
    {
      /* Don't repair if we loosed more than one row */
4305
      if (share->state.state.records+1 < start_records)
4306
      {
4307
        share->state.state.records=start_records;
4308 4309 4310
        goto err;
      }
    }
4311
    share->state.state.data_file_length= share->state.state.data_file_length=
4312 4313
      sort_param->filepos;
    /* Only whole records */
4314
    share->state.version= (ulong) time((time_t*) 0);
4315 4316 4317 4318
    /*
      Exchange the data file descriptor of the table, so that we use the
      new file from now on.
     */
unknown's avatar
unknown committed
4319 4320
    my_close(info->dfile.file, MYF(0));
    info->dfile.file= new_file;
4321 4322 4323
    share->pack.header_length=(ulong) new_header_length;
  }
  else
4324
    share->state.state.data_file_length=sort_param->max_pos;
4325

4326
  if (rep_quick && del+sort_info.dupp != share->state.state.del)
4327 4328 4329 4330 4331 4332 4333 4334
  {
    _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records");
    _ma_check_print_error(param,"Run recovery again without -q");
    param->retry_repair=1;
    param->testflag|=T_RETRY_WITHOUT_QUICK;
    goto err;
  }

4335
  if (rep_quick && (param->testflag & T_FORCE_UNIQUENESS))
4336
  {
4337
    my_off_t skr= (share->state.state.data_file_length +
4338 4339
                   (sort_info.org_data_file_type == COMPRESSED_RECORD) ?
                   MEMMAP_EXTRA_MARGIN : 0);
4340
#ifdef USE_RELOC
4341
    if (sort_info.org_data_file_type == STATIC_RECORD &&
4342 4343 4344
	skr < share->base.reloc*share->base.min_pack_length)
      skr=share->base.reloc*share->base.min_pack_length;
#endif
unknown's avatar
unknown committed
4345
    if (skr != sort_info.filelength)
unknown's avatar
unknown committed
4346
      if (my_chsize(info->dfile.file, skr, 0, MYF(0)))
4347 4348 4349 4350 4351
	_ma_check_print_warning(param,
			       "Can't change size of datafile,  error: %d",
			       my_errno);
  }
  if (param->testflag & T_CALC_CHECKSUM)
4352
    share->state.state.checksum=param->glob_crc;
4353

4354
  if (my_chsize(share->kfile.file, share->state.state.key_file_length, 0, MYF(0)))
4355
    _ma_check_print_warning(param,
unknown's avatar
unknown committed
4356 4357
			   "Can't change size of indexfile, error: %d",
                            my_errno);
4358 4359 4360

  if (!(param->testflag & T_SILENT))
  {
4361 4362
    if (start_records != share->state.state.records)
      printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
4363
  }
unknown's avatar
unknown committed
4364 4365 4366 4367
  if (sort_info.dupp)
    _ma_check_print_warning(param,
                            "%s records have been removed",
                            llstr(sort_info.dupp,llbuff));
4368
  got_error=0;
4369
  /* If invoked by external program that uses thr_lock */
4370
  if (&share->state.state != info->state)
4371
    *info->state= *info->state_start= share->state.state;
4372 4373

err:
4374 4375
  _ma_reset_state(info);

4376 4377 4378 4379 4380
  /*
    Destroy the write cache. The master thread did already detach from
    the share by remove_io_thread() or it was not yet started (if the
    error happend before creating the thread).
  */
4381
  VOID(end_io_cache(&sort_info.new_info->rec_cache));
unknown's avatar
unknown committed
4382 4383
  VOID(end_io_cache(&param->read_cache));
  info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4384
  sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4385 4386 4387 4388 4389 4390 4391 4392
  /*
    Destroy the new data cache in case of non-quick repair. All slave
    threads did either detach from the share by remove_io_thread()
    already or they were not yet started (if the error happend before
    creating the threads).
  */
  if (!rep_quick)
    VOID(end_io_cache(&new_data_cache));
4393 4394 4395 4396 4397 4398
  if (!got_error)
  {
    /* Replace the actual file with the temporary file */
    if (new_file >= 0)
    {
      my_close(new_file,MYF(0));
unknown's avatar
unknown committed
4399
      info->dfile.file= new_file= -1;
4400
      if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
unknown's avatar
unknown committed
4401
                                  DATA_TMP_EXT,
4402 4403 4404
                                  MYF((param->testflag & T_BACKUP_DATA ?
                                       MY_REDEL_MAKE_BACKUP : 0) |
                                      sync_dir)) ||
4405 4406 4407 4408 4409 4410 4411 4412
	  _ma_open_datafile(info,share,-1))
	got_error=1;
    }
  }
  if (got_error)
  {
    if (! param->error_printed)
      _ma_check_print_error(param,"%d when fixing table",my_errno);
unknown's avatar
unknown committed
4413
    (void)_ma_flush_table_files_before_swap(param, info);
4414 4415 4416
    if (new_file >= 0)
    {
      VOID(my_close(new_file,MYF(0)));
unknown's avatar
unknown committed
4417
      VOID(my_delete(param->temp_filename, MYF(MY_WME)));
unknown's avatar
unknown committed
4418 4419
      if (info->dfile.file == new_file)
	info->dfile.file= -1;
4420 4421 4422 4423 4424
    }
    maria_mark_crashed_on_repair(info);
  }
  else if (key_map == share->state.key_map)
    share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
unknown's avatar
unknown committed
4425
  share->state.changed|= STATE_NOT_SORTED_PAGES;
unknown's avatar
unknown committed
4426 4427 4428
  if (!rep_quick)
    share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED |
                             STATE_NOT_MOVABLE);
4429 4430 4431 4432

  pthread_cond_destroy (&sort_info.cond);
  pthread_mutex_destroy(&sort_info.mutex);

unknown's avatar
unknown committed
4433 4434 4435
  my_free((uchar*) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR));
  my_free((uchar*) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR));
  my_free((uchar*) sort_param,MYF(MY_ALLOW_ZERO_PTR));
4436 4437
  my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR));
  if (!got_error && (param->testflag & T_UNPACK))
unknown's avatar
unknown committed
4438
    restore_data_file_type(share);
4439 4440 4441 4442 4443 4444
  DBUG_RETURN(got_error);
#endif /* THREAD */
}

	/* Read next record and return next key */

unknown's avatar
unknown committed
4445
static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
4446 4447
{
  int error;
unknown's avatar
unknown committed
4448 4449
  MARIA_SORT_INFO *sort_info= sort_param->sort_info;
  MARIA_HA *info= sort_info->info;
4450
  MARIA_KEY int_key;
4451
  DBUG_ENTER("sort_key_read");
4452 4453 4454

  if ((error=sort_get_next_record(sort_param)))
    DBUG_RETURN(error);
4455
  if (info->s->state.state.records == sort_info->max_records)
4456 4457 4458 4459 4460 4461
  {
    _ma_check_print_error(sort_info->param,
			 "Key %d - Found too many records; Can't continue",
                         sort_param->key+1);
    DBUG_RETURN(1);
  }
4462 4463 4464
  if (_ma_sort_write_record(sort_param))
    DBUG_RETURN(1);

4465 4466 4467 4468 4469 4470
  (*info->s->keyinfo[sort_param->key].make_key)(info, &int_key,
                                                sort_param->key, key,
                                                sort_param->record,
                                                sort_param->current_filepos,
                                                0);
  sort_param->real_key_length= int_key.data_length + int_key.ref_length;
4471 4472 4473 4474
#ifdef HAVE_purify
  bzero(key+sort_param->real_key_length,
	(sort_param->key_length-sort_param->real_key_length));
#endif
4475
  DBUG_RETURN(0);
4476 4477
} /* sort_key_read */

unknown's avatar
unknown committed
4478

unknown's avatar
unknown committed
4479
static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
4480 4481 4482 4483 4484
{
  int error;
  MARIA_SORT_INFO *sort_info=sort_param->sort_info;
  MARIA_HA *info=sort_info->info;
  FT_WORD *wptr=0;
4485
  MARIA_KEY int_key;
4486 4487 4488 4489 4490 4491
  DBUG_ENTER("sort_maria_ft_key_read");

  if (!sort_param->wordlist)
  {
    for (;;)
    {
4492
      free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
4493 4494
      if ((error=sort_get_next_record(sort_param)))
        DBUG_RETURN(error);
4495 4496
      if ((error= _ma_sort_write_record(sort_param)))
        DBUG_RETURN(error);
4497 4498 4499
      if (!(wptr= _ma_ft_parserecord(info,sort_param->key,sort_param->record,
                                     &sort_param->wordroot)))

4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511
        DBUG_RETURN(1);
      if (wptr->pos)
        break;
    }
    sort_param->wordptr=sort_param->wordlist=wptr;
  }
  else
  {
    error=0;
    wptr=(FT_WORD*)(sort_param->wordptr);
  }

4512 4513 4514 4515
  _ma_ft_make_key(info, &int_key, sort_param->key, key, wptr++,
                  sort_param->current_filepos);
  sort_param->real_key_length= int_key.data_length + int_key.ref_length;

4516 4517 4518 4519 4520 4521 4522
#ifdef HAVE_purify
  if (sort_param->key_length > sort_param->real_key_length)
    bzero(key+sort_param->real_key_length,
	  (sort_param->key_length-sort_param->real_key_length));
#endif
  if (!wptr->pos)
  {
4523
    free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
4524 4525 4526 4527 4528 4529 4530 4531 4532
    sort_param->wordlist=0;
  }
  else
    sort_param->wordptr=(void*)wptr;

  DBUG_RETURN(error);
} /* sort_maria_ft_key_read */


4533 4534 4535 4536 4537 4538 4539
/*
  Read next record from file using parameters in sort_info.

  SYNOPSIS
    sort_get_next_record()
      sort_param                Information about and for the sort process

4540
  NOTES
4541 4542
    Dynamic Records With Non-Quick Parallel Repair

4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555
    For non-quick parallel repair we use a synchronized read/write
    cache. This means that one thread is the master who fixes the data
    file by reading each record from the old data file and writing it
    to the new data file. By doing this the records in the new data
    file are written contiguously. Whenever the write buffer is full,
    it is copied to the read buffer. The slaves read from the read
    buffer, which is not associated with a file. Thus read_cache.file
    is -1. When using _mi_read_cache(), the slaves must always set
    flag to READING_NEXT so that the function never tries to read from
    file. This is safe because the records are contiguous. There is no
    need to read outside the cache. This condition is evaluated in the
    variable 'parallel_flag' for quick reference. read_cache.file must
    be >= 0 in every other case.
4556 4557 4558 4559

  RETURN
    -1          end of file
    0           ok
4560
                sort_param->current_filepos points to record position.
4561
                sort_param->record contains record
4562
                sort_param->max_pos contains position to last byte read
4563 4564
    > 0         error
*/
4565 4566 4567 4568

static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
{
  int searching;
4569
  int parallel_flag;
4570 4571 4572 4573 4574 4575
  uint found_record,b_type,left_length;
  my_off_t pos;
  MARIA_BLOCK_INFO block_info;
  MARIA_SORT_INFO *sort_info=sort_param->sort_info;
  HA_CHECK *param=sort_info->param;
  MARIA_HA *info=sort_info->info;
4576
  MARIA_SHARE *share= info->s;
4577 4578 4579 4580 4581 4582
  char llbuff[22],llbuff2[22];
  DBUG_ENTER("sort_get_next_record");

  if (*_ma_killed_ptr(param))
    DBUG_RETURN(1);

4583
  switch (sort_info->org_data_file_type) {
unknown's avatar
unknown committed
4584
  case BLOCK_RECORD:
4585 4586 4587 4588
  {
    for (;;)
    {
      int flag;
unknown's avatar
unknown committed
4589 4590 4591 4592 4593 4594 4595 4596 4597 4598
      /*
        Assume table is transactional and it had LSN pages in the
        cache. Repair has flushed them, left data pages stay in
        cache, and disabled transactionality (so share's current page
        type is PLAIN); page cache would assert if it finds a cached LSN page
        while _ma_scan_block_record() requested a PLAIN page. So we use
        UNKNOWN.
      */
      enum pagecache_page_type save_page_type= share->page_type;
      share->page_type= PAGECACHE_READ_UNKNOWN_PAGE;
4599 4600 4601 4602 4603 4604 4605 4606
      if (info != sort_info->new_info)
      {
        /* Safe scanning */
        flag= _ma_safe_scan_block_record(sort_info, info,
                                         sort_param->record);
      }
      else
      {
unknown's avatar
unknown committed
4607 4608 4609 4610
        /*
          Scan on clean table.
          It requires a reliable data_file_length so we set it.
        */
4611
        share->state.state.data_file_length= sort_info->filelength;
unknown's avatar
unknown committed
4612
        info->cur_row.trid= 0;
4613 4614
        flag= _ma_scan_block_record(info, sort_param->record,
                                    info->cur_row.nextpos, 1);
unknown's avatar
unknown committed
4615 4616 4617 4618 4619 4620
        set_if_bigger(param->max_found_trid, info->cur_row.trid);
        if (info->cur_row.trid > param->max_trid)
        {
          _ma_check_print_not_visible_error(param, info->cur_row.trid);
          flag= HA_ERR_ROW_NOT_VISIBLE;
        }
4621
      }
unknown's avatar
unknown committed
4622
      share->page_type= save_page_type;
4623 4624 4625 4626 4627
      if (!flag)
      {
	if (sort_param->calc_checksum)
        {
          ha_checksum checksum;
4628 4629
          checksum= (*share->calc_check_checksum)(info, sort_param->record);
          if (share->calc_checksum &&
4630 4631 4632 4633
              info->cur_row.checksum != (checksum & 255))
          {
            if (param->testflag & T_VERBOSE)
            {
4634
              record_pos_to_txt(info, info->cur_row.lastpos, llbuff);
4635 4636 4637 4638 4639 4640 4641 4642 4643
              _ma_check_print_info(param,
                                   "Found record with wrong checksum at %s",
                                   llbuff);
            }
            continue;
          }
          info->cur_row.checksum= checksum;
	  param->glob_crc+= checksum;
        }
4644 4645
        sort_param->start_recpos= sort_param->current_filepos=
          info->cur_row.lastpos;
4646 4647 4648 4649
        DBUG_RETURN(0);
      }
      if (flag == HA_ERR_END_OF_FILE)
      {
4650
        sort_param->max_pos= share->state.state.data_file_length;
4651 4652 4653 4654
        DBUG_RETURN(-1);
      }
      /* Retry only if wrong record, not if disk error */
      if (flag != HA_ERR_WRONG_IN_RECORD)
4655 4656
      {
        retry_if_quick(sort_param, flag);
4657
        DBUG_RETURN(flag);
4658
      }
4659
    }
4660
    break;                                      /* Impossible */
4661
  }
4662 4663 4664 4665 4666 4667 4668 4669
  case STATIC_RECORD:
    for (;;)
    {
      if (my_b_read(&sort_param->read_cache,sort_param->record,
		    share->base.pack_reclength))
      {
	if (sort_param->read_cache.error)
	  param->out_flag |= O_DATA_LOST;
4670
        retry_if_quick(sort_param, my_errno);
4671 4672 4673 4674 4675
	DBUG_RETURN(-1);
      }
      sort_param->start_recpos=sort_param->pos;
      if (!sort_param->fix_datafile)
      {
4676
	sort_param->current_filepos= sort_param->pos;
4677 4678 4679 4680 4681 4682
        if (sort_param->master)
	  share->state.split++;
      }
      sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength);
      if (*sort_param->record)
      {
4683
	if (sort_param->calc_checksum)
unknown's avatar
unknown committed
4684
	  param->glob_crc+= (info->cur_row.checksum=
4685 4686 4687 4688 4689
			     _ma_static_checksum(info,sort_param->record));
	DBUG_RETURN(0);
      }
      if (!sort_param->fix_datafile && sort_param->master)
      {
4690 4691
	share->state.state.del++;
	share->state.state.empty+=share->base.pack_reclength;
4692 4693 4694
      }
    }
  case DYNAMIC_RECORD:
unknown's avatar
unknown committed
4695
  {
unknown's avatar
unknown committed
4696
    uchar *to;
4697
    ha_checksum checksum= 0;
unknown's avatar
unknown committed
4698
    LINT_INIT(to);
4699

4700 4701
    pos=sort_param->pos;
    searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND));
4702
    parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0;
4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729
    for (;;)
    {
      found_record=block_info.second_read= 0;
      left_length=1;
      if (searching)
      {
	pos=MY_ALIGN(pos,MARIA_DYN_ALIGN_SIZE);
        param->testflag|=T_RETRY_WITHOUT_QUICK;
	sort_param->start_recpos=pos;
      }
      do
      {
	if (pos > sort_param->max_pos)
	  sort_param->max_pos=pos;
	if (pos & (MARIA_DYN_ALIGN_SIZE-1))
	{
	  if ((param->testflag & T_VERBOSE) || searching == 0)
	    _ma_check_print_info(param,"Wrong aligned block at %s",
				llstr(pos,llbuff));
	  if (searching)
	    goto try_next;
	}
	if (found_record && pos == param->search_after_block)
	  _ma_check_print_info(param,"Block: %s used by record at %s",
		     llstr(param->search_after_block,llbuff),
		     llstr(sort_param->start_recpos,llbuff2));
	if (_ma_read_cache(&sort_param->read_cache,
unknown's avatar
unknown committed
4730
                           (uchar*) block_info.header,pos,
4731 4732
			   MARIA_BLOCK_INFO_HEADER_LENGTH,
			   (! found_record ? READING_NEXT : 0) |
4733
			   parallel_flag | READING_HEADER))
4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779
	{
	  if (found_record)
	  {
	    _ma_check_print_info(param,
				"Can't read whole record at %s (errno: %d)",
				llstr(sort_param->start_recpos,llbuff),errno);
	    goto try_next;
	  }
	  DBUG_RETURN(-1);
	}
	if (searching && ! sort_param->fix_datafile)
	{
	  param->error_printed=1;
          param->retry_repair=1;
          param->testflag|=T_RETRY_WITHOUT_QUICK;
	  DBUG_RETURN(1);	/* Something wrong with data */
	}
	b_type= _ma_get_block_info(&block_info,-1,pos);
	if ((b_type & (BLOCK_ERROR | BLOCK_FATAL_ERROR)) ||
	   ((b_type & BLOCK_FIRST) &&
	     (block_info.rec_len < (uint) share->base.min_pack_length ||
	      block_info.rec_len > (uint) share->base.max_pack_length)))
	{
	  uint i;
	  if (param->testflag & T_VERBOSE || searching == 0)
	    _ma_check_print_info(param,
				"Wrong bytesec: %3d-%3d-%3d at %10s; Skipped",
		       block_info.header[0],block_info.header[1],
		       block_info.header[2],llstr(pos,llbuff));
	  if (found_record)
	    goto try_next;
	  block_info.second_read=0;
	  searching=1;
	  /* Search after block in read header string */
	  for (i=MARIA_DYN_ALIGN_SIZE ;
	       i < MARIA_BLOCK_INFO_HEADER_LENGTH ;
	       i+= MARIA_DYN_ALIGN_SIZE)
	    if (block_info.header[i] >= 1 &&
		block_info.header[i] <= MARIA_MAX_DYN_HEADER_BYTE)
	      break;
	  pos+=(ulong) i;
	  sort_param->start_recpos=pos;
	  continue;
	}
	if (b_type & BLOCK_DELETED)
	{
4780
	  my_bool error=0;
4781 4782 4783 4784 4785
	  if (block_info.block_len+ (uint) (block_info.filepos-pos) <
	      share->base.min_block_length)
	  {
	    if (!searching)
	      _ma_check_print_info(param,
4786 4787 4788
                                   "Deleted block with impossible length %lu "
                                   "at %s",
                                   block_info.block_len,llstr(pos,llbuff));
4789 4790 4791 4792 4793 4794
	    error=1;
	  }
	  else
	  {
	    if ((block_info.next_filepos != HA_OFFSET_ERROR &&
		 block_info.next_filepos >=
4795
		 share->state.state.data_file_length) ||
4796
		(block_info.prev_filepos != HA_OFFSET_ERROR &&
4797
		 block_info.prev_filepos >= share->state.state.data_file_length))
4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825
	    {
	      if (!searching)
		_ma_check_print_info(param,
				    "Delete link points outside datafile at %s",
				    llstr(pos,llbuff));
	      error=1;
	    }
	  }
	  if (error)
	  {
	    if (found_record)
	      goto try_next;
	    searching=1;
	    pos+= MARIA_DYN_ALIGN_SIZE;
	    sort_param->start_recpos=pos;
	    block_info.second_read=0;
	    continue;
	  }
	}
	else
	{
	  if (block_info.block_len+ (uint) (block_info.filepos-pos) <
	      share->base.min_block_length ||
	      block_info.block_len > (uint) share->base.max_pack_length+
	      MARIA_SPLIT_LENGTH)
	  {
	    if (!searching)
	      _ma_check_print_info(param,
4826 4827 4828
                                   "Found block with impossible length %lu "
                                   "at %s; Skipped",
                                   block_info.block_len+
4829
                                   (uint) (block_info.filepos-pos),
4830
                                   llstr(pos,llbuff));
4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844
	    if (found_record)
	      goto try_next;
	    searching=1;
	    pos+= MARIA_DYN_ALIGN_SIZE;
	    sort_param->start_recpos=pos;
	    block_info.second_read=0;
	    continue;
	  }
	}
	if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
	{
          if (!sort_param->fix_datafile && sort_param->master &&
              (b_type & BLOCK_DELETED))
	  {
4845 4846
	    share->state.state.empty+=block_info.block_len;
	    share->state.state.del++;
4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868
	    share->state.split++;
	  }
	  if (found_record)
	    goto try_next;
	  if (searching)
	  {
	    pos+=MARIA_DYN_ALIGN_SIZE;
	    sort_param->start_recpos=pos;
	  }
	  else
	    pos=block_info.filepos+block_info.block_len;
	  block_info.second_read=0;
	  continue;
	}

	if (!sort_param->fix_datafile && sort_param->master)
	  share->state.split++;
	if (! found_record++)
	{
	  sort_param->find_length=left_length=block_info.rec_len;
	  sort_param->start_recpos=pos;
	  if (!sort_param->fix_datafile)
4869
	    sort_param->current_filepos= sort_param->start_recpos;
4870 4871 4872 4873 4874 4875
	  if (sort_param->fix_datafile && (param->testflag & T_EXTEND))
	    sort_param->pos=block_info.filepos+1;
	  else
	    sort_param->pos=block_info.filepos+block_info.block_len;
	  if (share->base.blobs)
	  {
unknown's avatar
unknown committed
4876 4877 4878
	    if (_ma_alloc_buffer(&sort_param->rec_buff,
                                 &sort_param->rec_buff_size,
                                 block_info.rec_len +
4879
                                 share->base.extra_rec_buff_size))
4880

4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897
	    {
	      if (param->max_record_length >= block_info.rec_len)
	      {
		_ma_check_print_error(param,"Not enough memory for blob at %s (need %lu)",
				     llstr(sort_param->start_recpos,llbuff),
				     (ulong) block_info.rec_len);
		DBUG_RETURN(1);
	      }
	      else
	      {
		_ma_check_print_info(param,"Not enough memory for blob at %s (need %lu); Row skipped",
				    llstr(sort_param->start_recpos,llbuff),
				    (ulong) block_info.rec_len);
		goto try_next;
	      }
	    }
	  }
unknown's avatar
unknown committed
4898
          to= sort_param->rec_buff;
4899 4900 4901 4902
	}
	if (left_length < block_info.data_len || ! block_info.data_len)
	{
	  _ma_check_print_info(param,
unknown's avatar
unknown committed
4903 4904 4905
			      "Found block with too small length at %s; "
                               "Skipped",
                               llstr(sort_param->start_recpos,llbuff));
4906 4907 4908 4909 4910 4911
	  goto try_next;
	}
	if (block_info.filepos + block_info.data_len >
	    sort_param->read_cache.end_of_file)
	{
	  _ma_check_print_info(param,
unknown's avatar
unknown committed
4912 4913 4914
			      "Found block that points outside data file "
                               "at %s",
                               llstr(sort_param->start_recpos,llbuff));
4915 4916
	  goto try_next;
	}
4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941
        /*
          Copy information that is already read. Avoid accessing data
          below the cache start. This could happen if the header
          streched over the end of the previous buffer contents.
        */
        {
          uint header_len= (uint) (block_info.filepos - pos);
          uint prefetch_len= (MARIA_BLOCK_INFO_HEADER_LENGTH - header_len);

          if (prefetch_len > block_info.data_len)
            prefetch_len= block_info.data_len;
          if (prefetch_len)
          {
            memcpy(to, block_info.header + header_len, prefetch_len);
            block_info.filepos+= prefetch_len;
            block_info.data_len-= prefetch_len;
            left_length-= prefetch_len;
            to+= prefetch_len;
          }
        }
        if (block_info.data_len &&
            _ma_read_cache(&sort_param->read_cache,to,block_info.filepos,
                           block_info.data_len,
                           (found_record == 1 ? READING_NEXT : 0) |
                           parallel_flag))
4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970
	{
	  _ma_check_print_info(param,
			      "Read error for block at: %s (error: %d); Skipped",
			      llstr(block_info.filepos,llbuff),my_errno);
	  goto try_next;
	}
	left_length-=block_info.data_len;
	to+=block_info.data_len;
	pos=block_info.next_filepos;
	if (pos == HA_OFFSET_ERROR && left_length)
	{
	  _ma_check_print_info(param,"Wrong block with wrong total length starting at %s",
			      llstr(sort_param->start_recpos,llbuff));
	  goto try_next;
	}
	if (pos + MARIA_BLOCK_INFO_HEADER_LENGTH > sort_param->read_cache.end_of_file)
	{
	  _ma_check_print_info(param,"Found link that points at %s (outside data file) at %s",
			      llstr(pos,llbuff2),
			      llstr(sort_param->start_recpos,llbuff));
	  goto try_next;
	}
      } while (left_length);

      if (_ma_rec_unpack(info,sort_param->record,sort_param->rec_buff,
			 sort_param->find_length) != MY_FILE_ERROR)
      {
	if (sort_param->read_cache.error < 0)
	  DBUG_RETURN(1);
4971
	if (sort_param->calc_checksum)
4972
	  checksum= (share->calc_check_checksum)(info, sort_param->record);
4973 4974 4975 4976 4977
	if ((param->testflag & (T_EXTEND | T_REP)) || searching)
	{
	  if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff,
                            sort_param->find_length,
                            (param->testflag & T_QUICK) &&
4978
                            sort_param->calc_checksum &&
4979
                            test(share->calc_checksum), checksum))
4980 4981 4982 4983 4984 4985
	  {
	    _ma_check_print_info(param,"Found wrong packed record at %s",
				llstr(sort_param->start_recpos,llbuff));
	    goto try_next;
	  }
	}
4986
	if (sort_param->calc_checksum)
4987
	  param->glob_crc+= checksum;
4988 4989 4990 4991 4992 4993 4994 4995 4996 4997
	DBUG_RETURN(0);
      }
      if (!searching)
        _ma_check_print_info(param,"Key %d - Found wrong stored record at %s",
                            sort_param->key+1,
                            llstr(sort_param->start_recpos,llbuff));
    try_next:
      pos=(sort_param->start_recpos+=MARIA_DYN_ALIGN_SIZE);
      searching=1;
    }
unknown's avatar
unknown committed
4998
  }
4999 5000 5001
  case COMPRESSED_RECORD:
    for (searching=0 ;; searching=1, sort_param->pos++)
    {
unknown's avatar
unknown committed
5002
      if (_ma_read_cache(&sort_param->read_cache,(uchar*) block_info.header,
5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013
			 sort_param->pos,
			 share->pack.ref_length,READING_NEXT))
	DBUG_RETURN(-1);
      if (searching && ! sort_param->fix_datafile)
      {
	param->error_printed=1;
        param->retry_repair=1;
        param->testflag|=T_RETRY_WITHOUT_QUICK;
	DBUG_RETURN(1);		/* Something wrong with data */
      }
      sort_param->start_recpos=sort_param->pos;
5014
      if (_ma_pack_get_block_info(info, &sort_param->bit_buff, &block_info,
unknown's avatar
unknown committed
5015 5016 5017
                                  &sort_param->rec_buff,
                                  &sort_param->rec_buff_size, -1,
                                  sort_param->pos))
5018 5019 5020 5021 5022 5023 5024 5025 5026
	DBUG_RETURN(-1);
      if (!block_info.rec_len &&
	  sort_param->pos + MEMMAP_EXTRA_MARGIN ==
	  sort_param->read_cache.end_of_file)
	DBUG_RETURN(-1);
      if (block_info.rec_len < (uint) share->min_pack_length ||
	  block_info.rec_len > (uint) share->max_pack_length)
      {
	if (! searching)
5027 5028 5029 5030 5031
	  _ma_check_print_info(param,
                               "Found block with wrong recordlength: %lu "
                               "at %s\n",
                               block_info.rec_len,
                               llstr(sort_param->pos,llbuff));
5032 5033
	continue;
      }
unknown's avatar
unknown committed
5034
      if (_ma_read_cache(&sort_param->read_cache,(uchar*) sort_param->rec_buff,
5035 5036 5037 5038 5039 5040 5041 5042
			 block_info.filepos, block_info.rec_len,
			 READING_NEXT))
      {
	if (! searching)
	  _ma_check_print_info(param,"Couldn't read whole record from %s",
			      llstr(sort_param->pos,llbuff));
	continue;
      }
5043 5044
#ifdef HAVE_purify
      bzero(sort_param->rec_buff + block_info.rec_len,
5045
            share->base.extra_rec_buff_size);
5046
#endif
5047 5048
      if (_ma_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record,
                              sort_param->rec_buff, block_info.rec_len))
5049 5050 5051 5052 5053 5054 5055 5056
      {
	if (! searching)
	  _ma_check_print_info(param,"Found wrong record at %s",
			      llstr(sort_param->pos,llbuff));
	continue;
      }
      if (!sort_param->fix_datafile)
      {
5057
	sort_param->current_filepos= sort_param->pos;
5058 5059 5060
        if (sort_param->master)
	  share->state.split++;
      }
5061 5062
      sort_param->max_pos= (sort_param->pos=block_info.filepos+
                            block_info.rec_len);
5063
      info->packed_length=block_info.rec_len;
unknown's avatar
unknown committed
5064

5065
      if (sort_param->calc_checksum)
unknown's avatar
unknown committed
5066
      {
5067
        info->cur_row.checksum= (*share->calc_check_checksum)(info,
5068 5069
                                                                sort_param->
                                                                record);
unknown's avatar
unknown committed
5070 5071
	param->glob_crc+= info->cur_row.checksum;
      }
5072 5073 5074 5075 5076 5077 5078
      DBUG_RETURN(0);
    }
  }
  DBUG_RETURN(1);		/* Impossible */
}


5079 5080
/**
   @brief Write record to new file.
5081

5082 5083
   @fn    _ma_sort_write_record()
   @param sort_param                Sort parameters.
5084

5085 5086
   @note
   This is only called by a master thread if parallel repair is used.
5087

5088 5089 5090 5091 5092 5093 5094
   @return
   @retval  0   OK
                sort_param->current_filepos points to inserted record for
                block_records and to the place for the next record for
                other row types.
                sort_param->filepos points to end of file
  @retval   1   Error
5095
*/
5096 5097 5098 5099 5100 5101

int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param)
{
  int flag;
  uint length;
  ulong block_length,reclength;
unknown's avatar
unknown committed
5102 5103
  uchar *from;
  uchar block_buff[8];
5104
  MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5105 5106
  HA_CHECK *param= sort_info->param;
  MARIA_HA *info= sort_info->new_info;
5107
  MARIA_SHARE *share= info->s;
5108 5109 5110 5111
  DBUG_ENTER("_ma_sort_write_record");

  if (sort_param->fix_datafile)
  {
5112
    sort_param->current_filepos= sort_param->filepos;
5113
    switch (sort_info->new_data_file_type) {
unknown's avatar
unknown committed
5114
    case BLOCK_RECORD:
5115 5116
      if ((sort_param->current_filepos=
           (*share->write_record_init)(info, sort_param->record)) ==
5117 5118
          HA_OFFSET_ERROR)
        DBUG_RETURN(1);
5119
      /* Pointer to end of file */
5120
      sort_param->filepos= share->state.state.data_file_length;
unknown's avatar
unknown committed
5121
      break;
5122 5123 5124 5125 5126 5127 5128 5129
    case STATIC_RECORD:
      if (my_b_write(&info->rec_cache,sort_param->record,
		     share->base.pack_reclength))
      {
	_ma_check_print_error(param,"%d when writing to datafile",my_errno);
	DBUG_RETURN(1);
      }
      sort_param->filepos+=share->base.pack_reclength;
5130
      share->state.split++;
5131 5132 5133 5134 5135 5136 5137
      break;
    case DYNAMIC_RECORD:
      if (! info->blobs)
	from=sort_param->rec_buff;
      else
      {
	/* must be sure that local buffer is big enough */
5138
	reclength=share->base.pack_reclength+
5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149
	  _ma_calc_total_blob_length(info,sort_param->record)+
	  ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
	  MARIA_DYN_DELETE_BLOCK_HEADER;
	if (sort_info->buff_length < reclength)
	{
	  if (!(sort_info->buff=my_realloc(sort_info->buff, (uint) reclength,
					   MYF(MY_FREE_ON_ERROR |
					       MY_ALLOW_ZERO_PTR))))
	    DBUG_RETURN(1);
	  sort_info->buff_length=reclength;
	}
unknown's avatar
unknown committed
5150
	from= (uchar *) sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER);
5151
      }
unknown's avatar
unknown committed
5152
      /* We can use info->checksum here as only one thread calls this */
5153
      info->cur_row.checksum= (*share->calc_check_checksum)(info,
5154 5155
                                                              sort_param->
                                                              record);
5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175
      reclength= _ma_rec_pack(info,from,sort_param->record);
      flag=0;

      do
      {
	block_length=reclength+ 3 + test(reclength >= (65520-3));
	if (block_length < share->base.min_block_length)
	  block_length=share->base.min_block_length;
	info->update|=HA_STATE_WRITE_AT_END;
	block_length=MY_ALIGN(block_length,MARIA_DYN_ALIGN_SIZE);
	if (block_length > MARIA_MAX_BLOCK_LENGTH)
	  block_length=MARIA_MAX_BLOCK_LENGTH;
	if (_ma_write_part_record(info,0L,block_length,
				  sort_param->filepos+block_length,
				  &from,&reclength,&flag))
	{
	  _ma_check_print_error(param,"%d when writing to datafile",my_errno);
	  DBUG_RETURN(1);
	}
	sort_param->filepos+=block_length;
5176
	share->state.split++;
5177 5178 5179 5180 5181 5182
      } while (reclength);
      break;
    case COMPRESSED_RECORD:
      reclength=info->packed_length;
      length= _ma_save_pack_length((uint) share->pack.version, block_buff,
                               reclength);
5183
      if (share->base.blobs)
5184 5185 5186
	length+= _ma_save_pack_length((uint) share->pack.version,
	                          block_buff + length, info->blob_length);
      if (my_b_write(&info->rec_cache,block_buff,length) ||
unknown's avatar
unknown committed
5187
	  my_b_write(&info->rec_cache,(uchar*) sort_param->rec_buff,reclength))
5188 5189 5190 5191 5192
      {
	_ma_check_print_error(param,"%d when writing to datafile",my_errno);
	DBUG_RETURN(1);
      }
      sort_param->filepos+=reclength+length;
5193
      share->state.split++;
5194 5195 5196 5197 5198
      break;
    }
  }
  if (sort_param->master)
  {
5199
    share->state.state.records++;
5200
    if ((param->testflag & T_WRITE_LOOP) &&
5201
        (share->state.state.records % WRITE_COUNT) == 0)
5202 5203
    {
      char llbuff[22];
5204
      printf("%s\r", llstr(share->state.state.records,llbuff));
5205 5206 5207 5208 5209 5210 5211
      VOID(fflush(stdout));
    }
  }
  DBUG_RETURN(0);
} /* _ma_sort_write_record */


5212
/* Compare two keys from _ma_create_index_by_sort */
5213 5214 5215 5216 5217

static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
			const void *b)
{
  uint not_used[2];
5218 5219
  return (ha_key_cmp(sort_param->seg, *((uchar* const *) a),
                     *((uchar* const *) b),
5220 5221 5222 5223
		     USE_WHOLE_KEY, SEARCH_SAME, not_used));
} /* sort_key_cmp */


unknown's avatar
unknown committed
5224
static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a)
5225 5226 5227 5228 5229 5230 5231 5232 5233
{
  uint diff_pos[2];
  char llbuff[22],llbuff2[22];
  MARIA_SORT_INFO *sort_info=sort_param->sort_info;
  HA_CHECK *param= sort_info->param;
  int cmp;

  if (sort_info->key_block->inited)
  {
5234 5235 5236 5237
    cmp= ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey,
                    a, USE_WHOLE_KEY,
                    SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT,
                    diff_pos);
5238
    if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
unknown's avatar
unknown committed
5239
      ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey,
5240
                 a, USE_WHOLE_KEY,
5241 5242 5243 5244 5245 5246
                 SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos);
    else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
    {
      diff_pos[0]= maria_collect_stats_nonulls_next(sort_param->seg,
                                                 sort_param->notnull,
                                                 sort_info->key_block->lastkey,
unknown's avatar
unknown committed
5247
                                                 a);
5248 5249 5250 5251 5252 5253 5254 5255
    }
    sort_param->unique[diff_pos[0]-1]++;
  }
  else
  {
    cmp= -1;
    if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
      maria_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
unknown's avatar
unknown committed
5256
                                        a);
5257 5258 5259 5260
  }
  if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
  {
    sort_info->dupp++;
5261
    sort_info->info->cur_row.lastpos= get_record_for_key(sort_param->keyinfo,
unknown's avatar
unknown committed
5262
                                                         a);
5263
    _ma_check_print_warning(param,
5264 5265 5266
			   "Duplicate key %2u for record at %10s against record at %10s",
                            sort_param->key + 1,
                            llstr(sort_info->info->cur_row.lastpos, llbuff),
5267
                            llstr(get_record_for_key(sort_param->keyinfo,
5268 5269 5270
                                                     sort_info->key_block->
                                                     lastkey),
                                  llbuff2));
5271 5272
    param->testflag|=T_RETRY_WITHOUT_QUICK;
    if (sort_info->param->testflag & T_VERBOSE)
5273
      _ma_print_keydata(stdout,sort_param->seg, a, USE_WHOLE_KEY);
5274 5275 5276 5277 5278 5279 5280 5281 5282 5283
    return (sort_delete_record(sort_param));
  }
#ifndef DBUG_OFF
  if (cmp > 0)
  {
    _ma_check_print_error(param,
			 "Internal error: Keys are not in order from sort");
    return(1);
  }
#endif
unknown's avatar
unknown committed
5284 5285
  return (sort_insert_key(sort_param, sort_info->key_block,
			  a, HA_OFFSET_ERROR));
5286 5287
} /* sort_key_write */

unknown's avatar
unknown committed
5288

5289 5290 5291 5292 5293 5294 5295 5296
int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param)
{
  MARIA_SORT_INFO *sort_info=sort_param->sort_info;
  SORT_KEY_BLOCKS *key_block=sort_info->key_block;
  MARIA_SHARE *share=sort_info->info->s;
  uint val_off, val_len;
  int error;
  SORT_FT_BUF *maria_ft_buf=sort_info->ft_buf;
unknown's avatar
unknown committed
5297
  uchar *from, *to;
5298 5299 5300

  val_len=share->ft2_keyinfo.keylength;
  get_key_full_length_rdonly(val_off, maria_ft_buf->lastkey);
unknown's avatar
unknown committed
5301
  to= maria_ft_buf->lastkey+val_off;
5302 5303 5304 5305

  if (maria_ft_buf->buf)
  {
    /* flushing first-level tree */
unknown's avatar
unknown committed
5306 5307
    error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
                           HA_OFFSET_ERROR);
5308 5309 5310 5311 5312
    for (from=to+val_len;
         !error && from < maria_ft_buf->buf;
         from+= val_len)
    {
      memcpy(to, from, val_len);
unknown's avatar
unknown committed
5313 5314
      error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
                             HA_OFFSET_ERROR);
5315 5316 5317 5318 5319 5320 5321
    }
    return error;
  }
  /* flushing second-level tree keyblocks */
  error=_ma_flush_pending_blocks(sort_param);
  /* updating lastkey with second-level tree info */
  ft_intXstore(maria_ft_buf->lastkey+val_off, -maria_ft_buf->count);
5322
  _ma_dpointer(sort_info->info->s, maria_ft_buf->lastkey+val_off+HA_FT_WLEN,
5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333
      share->state.key_root[sort_param->key]);
  /* restoring first level tree data in sort_info/sort_param */
  sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks;
  sort_param->keyinfo=share->keyinfo+sort_param->key;
  share->state.key_root[sort_param->key]=HA_OFFSET_ERROR;
  /* writing lastkey in first-level tree */
  return error ? error :
                 sort_insert_key(sort_param,sort_info->key_block,
                                 maria_ft_buf->lastkey,HA_OFFSET_ERROR);
}

unknown's avatar
unknown committed
5334 5335

static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
unknown's avatar
unknown committed
5336
                                   const uchar *a)
5337 5338 5339 5340 5341
{
  uint a_len, val_off, val_len, error;
  MARIA_SORT_INFO *sort_info= sort_param->sort_info;
  SORT_FT_BUF *ft_buf= sort_info->ft_buf;
  SORT_KEY_BLOCKS *key_block= sort_info->key_block;
5342
  MARIA_SHARE *share= sort_info->info->s;
5343

5344
  val_len=HA_FT_WLEN+share->base.rec_reflength;
5345
  get_key_full_length_rdonly(a_len, a);
5346 5347 5348 5349 5350 5351 5352

  if (!ft_buf)
  {
    /*
      use two-level tree only if key_reflength fits in rec_reflength place
      and row format is NOT static - for _ma_dpointer not to garble offsets
     */
5353 5354 5355
    if ((share->base.key_reflength <=
         share->base.rec_reflength) &&
        (share->options &
5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370
          (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)))
      ft_buf= (SORT_FT_BUF *)my_malloc(sort_param->keyinfo->block_length +
                                       sizeof(SORT_FT_BUF), MYF(MY_WME));

    if (!ft_buf)
    {
      sort_param->key_write=sort_key_write;
      return sort_key_write(sort_param, a);
    }
    sort_info->ft_buf= ft_buf;
    goto word_init_ft_buf;              /* no need to duplicate the code */
  }
  get_key_full_length_rdonly(val_off, ft_buf->lastkey);

  if (ha_compare_text(sort_param->seg->charset,
5371
                      a+1,a_len-1,
unknown's avatar
unknown committed
5372
                      (uchar*) ft_buf->lastkey+1,val_off-1, 0, 0)==0)
5373
  {
unknown's avatar
unknown committed
5374
    uchar *p;
5375 5376 5377 5378
    if (!ft_buf->buf)                   /* store in second-level tree */
    {
      ft_buf->count++;
      return sort_insert_key(sort_param,key_block,
unknown's avatar
unknown committed
5379
                             a + a_len, HA_OFFSET_ERROR);
5380 5381 5382
    }

    /* storing the key in the buffer. */
5383
    memcpy (ft_buf->buf, (const char *)a+a_len, val_len);
5384 5385 5386 5387 5388 5389 5390 5391 5392 5393
    ft_buf->buf+=val_len;
    if (ft_buf->buf < ft_buf->end)
      return 0;

    /* converting to two-level tree */
    p=ft_buf->lastkey+val_off;

    while (key_block->inited)
      key_block++;
    sort_info->key_block=key_block;
5394
    sort_param->keyinfo= &share->ft2_keyinfo;
5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424
    ft_buf->count=(ft_buf->buf - p)/val_len;

    /* flushing buffer to second-level tree */
    for (error=0; !error && p < ft_buf->buf; p+= val_len)
      error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR);
    ft_buf->buf=0;
    return error;
  }

  /* flushing buffer */
  if ((error=_ma_sort_ft_buf_flush(sort_param)))
    return error;

word_init_ft_buf:
  a_len+=val_len;
  memcpy(ft_buf->lastkey, a, a_len);
  ft_buf->buf=ft_buf->lastkey+a_len;
  /*
    32 is just a safety margin here
    (at least max(val_len, sizeof(nod_flag)) should be there).
    May be better performance could be achieved if we'd put
      (sort_info->keyinfo->block_length-32)/XXX
      instead.
        TODO: benchmark the best value for XXX.
  */
  ft_buf->end= ft_buf->lastkey+ (sort_param->keyinfo->block_length-32);
  return 0;
} /* sort_maria_ft_key_write */


5425
/* get pointer to record from a key */
5426

5427 5428
static my_off_t get_record_for_key(MARIA_KEYDEF *keyinfo,
				   const uchar *key_data)
5429
{
5430 5431 5432 5433 5434
  MARIA_KEY key;
  key.keyinfo= keyinfo;
  key.data= (uchar*) key_data;
  key.data_length= _ma_keylength(keyinfo, key_data);
  return _ma_row_pos_from_key(&key);
5435 5436 5437 5438 5439 5440
} /* get_record_for_key */


	/* Insert a key in sort-key-blocks */

static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
unknown's avatar
unknown committed
5441
			   register SORT_KEY_BLOCKS *key_block,
unknown's avatar
unknown committed
5442
                           const uchar *key,
5443 5444 5445 5446
			   my_off_t prev_block)
{
  uint a_length,t_length,nod_flag;
  my_off_t filepos,key_file_length;
unknown's avatar
unknown committed
5447
  uchar *anc_buff,*lastkey;
5448 5449 5450 5451
  MARIA_KEY_PARAM s_temp;
  MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
  MARIA_SORT_INFO *sort_info= sort_param->sort_info;
  HA_CHECK *param=sort_info->param;
5452
  MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
5453
  MARIA_KEY tmp_key;
5454 5455
  MARIA_HA *info= sort_info->info;
  MARIA_SHARE *share= info->s;
5456 5457
  DBUG_ENTER("sort_insert_key");

unknown's avatar
unknown committed
5458
  anc_buff= key_block->buff;
5459 5460
  lastkey=key_block->lastkey;
  nod_flag= (key_block == sort_info->key_block ? 0 :
5461
	     share->base.key_reflength);
5462 5463 5464 5465 5466 5467 5468 5469 5470

  if (!key_block->inited)
  {
    key_block->inited=1;
    if (key_block == sort_info->key_block_end)
    {
      _ma_check_print_error(param,"To many key-block-levels; Try increasing sort_key_blocks");
      DBUG_RETURN(1);
    }
5471 5472 5473 5474 5475
    a_length= share->keypage_header + nod_flag;
    key_block->end_pos= anc_buff + share->keypage_header;
    bzero(anc_buff, share->keypage_header);
    _ma_store_keynr(share, anc_buff, (uint) (sort_param->keyinfo -
                                            share->keyinfo));
5476 5477 5478
    lastkey=0;					/* No previous key in block */
  }
  else
5479
    a_length= _ma_get_page_used(share, anc_buff);
5480 5481 5482

	/* Save pointer to previous block */
  if (nod_flag)
5483
  {
5484
    _ma_store_keypage_flag(share, anc_buff, KEYPAGE_FLAG_ISNOD);
5485
    _ma_kpointer(info,key_block->end_pos,prev_block);
5486
  }
5487

5488 5489 5490 5491 5492 5493 5494
  tmp_key.keyinfo= keyinfo;
  tmp_key.data= (uchar*) key;
  tmp_key.data_length= _ma_keylength(keyinfo, key) - share->base.rec_reflength;
  tmp_key.ref_length=  share->base.rec_reflength;

  t_length= (*keyinfo->pack_key)(&tmp_key, nod_flag,
                                 (uchar*) 0, lastkey, lastkey, &s_temp);
5495 5496
  (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp);
  a_length+=t_length;
5497
  _ma_store_page_used(share, anc_buff, a_length);
5498
  key_block->end_pos+=t_length;
unknown's avatar
unknown committed
5499
  if (a_length <= (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE))
5500
  {
5501 5502 5503
    MARIA_KEY tmp_key2;
    tmp_key2.data= key_block->lastkey;
    _ma_copy_key(&tmp_key2, &tmp_key);
5504 5505 5506 5507
    key_block->last_length=a_length-t_length;
    DBUG_RETURN(0);
  }

5508
  /* Fill block with end-zero and write filled block */
5509
  _ma_store_page_used(share, anc_buff, key_block->last_length);
unknown's avatar
unknown committed
5510
  bzero(anc_buff+key_block->last_length,
5511
	keyinfo->block_length- key_block->last_length);
5512
  key_file_length=share->state.state.key_file_length;
5513
  if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) == HA_OFFSET_ERROR)
5514
    DBUG_RETURN(1);
5515
  _ma_fast_unlock_key_del(info);
5516 5517

  /* If we read the page from the key cache, we have to write it back to it */
5518
  if (page_link->changed)
5519
  {
5520 5521 5522 5523
    pop_dynamic(&info->pinned_pages);
    if (_ma_write_keypage(info, keyinfo, filepos,
                          PAGECACHE_LOCK_WRITE_UNLOCK,
                          DFLT_INIT_HITS, anc_buff))
5524 5525
      DBUG_RETURN(1);
  }
unknown's avatar
unknown committed
5526 5527 5528 5529 5530 5531 5532
  else
  {
    put_crc(anc_buff, filepos, share);
    if (my_pwrite(share->kfile.file, anc_buff,
                  (uint) keyinfo->block_length, filepos, param->myf_rw))
      DBUG_RETURN(1);
  }
5533
  DBUG_DUMP("buff", anc_buff, _ma_get_page_used(share, anc_buff));
5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544

	/* Write separator-key to block in next level */
  if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos))
    DBUG_RETURN(1);

	/* clear old block and write new key in it */
  key_block->inited=0;
  DBUG_RETURN(sort_insert_key(sort_param, key_block,key,prev_block));
} /* sort_insert_key */


5545
/* Delete record when we found a duplicated key */
5546 5547 5548 5549 5550

static int sort_delete_record(MARIA_SORT_PARAM *sort_param)
{
  uint i;
  int old_file,error;
unknown's avatar
unknown committed
5551
  uchar *key;
5552 5553
  MARIA_SORT_INFO *sort_info=sort_param->sort_info;
  HA_CHECK *param=sort_info->param;
5554
  MARIA_HA *row_info= sort_info->new_info, *key_info= sort_info->info;
5555 5556 5557 5558 5559
  DBUG_ENTER("sort_delete_record");

  if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
  {
    _ma_check_print_error(param,
5560 5561
			 "Quick-recover aborted; Run recovery without switch -q or with "
                          "switch -qq");
5562 5563
    DBUG_RETURN(1);
  }
5564
  if (key_info->s->options & HA_OPTION_COMPRESS_RECORD)
5565 5566
  {
    _ma_check_print_error(param,
5567 5568 5569
                          "Recover aborted; Can't run standard recovery on "
                          "compressed tables with errors in data-file. "
                          "Use 'maria_chk --safe-recover' to fix it");
5570 5571 5572
    DBUG_RETURN(1);
  }

5573 5574 5575
  old_file= row_info->dfile.file;
  /* This only affects static and dynamic row formats */
  row_info->dfile.file= row_info->rec_cache.file;
unknown's avatar
unknown committed
5576 5577 5578
  if (flush_io_cache(&row_info->rec_cache))
    DBUG_RETURN(1);

5579
  key= key_info->lastkey_buff + key_info->s->base.max_key_length;
unknown's avatar
unknown committed
5580 5581
  if ((error=(*row_info->s->read_record)(row_info, sort_param->record,
                                         key_info->cur_row.lastpos)) &&
5582
	error != HA_ERR_RECORD_DELETED)
unknown's avatar
unknown committed
5583 5584 5585 5586 5587 5588 5589 5590 5591
  {
    _ma_check_print_error(param,"Can't read record to be removed");
    row_info->dfile.file= old_file;
    DBUG_RETURN(1);
  }
  row_info->cur_row.lastpos= key_info->cur_row.lastpos;

  for (i=0 ; i < sort_info->current_key ; i++)
  {
5592 5593 5594 5595 5596
    MARIA_KEY tmp_key;
    (*key_info->s->keyinfo[i].make_key)(key_info, &tmp_key, i, key,
                                        sort_param->record,
                                        key_info->cur_row.lastpos, 0);
    if (_ma_ck_delete(key_info, &tmp_key))
5597
    {
unknown's avatar
unknown committed
5598 5599 5600
      _ma_check_print_error(param,
                            "Can't delete key %d from record to be removed",
                            i+1);
5601
      row_info->dfile.file= old_file;
5602 5603 5604
      DBUG_RETURN(1);
    }
  }
unknown's avatar
unknown committed
5605 5606 5607 5608 5609 5610 5611
  if (sort_param->calc_checksum)
    param->glob_crc-=(*key_info->s->calc_check_checksum)(key_info,
                                                         sort_param->record);
  error= (*row_info->s->delete_record)(row_info, sort_param->record);
  if (error)
    _ma_check_print_error(param,"Got error %d when deleting record",
                          my_errno);
5612
  row_info->dfile.file= old_file;           /* restore actual value */
5613
  row_info->s->state.state.records--;
5614 5615 5616
  DBUG_RETURN(error);
} /* sort_delete_record */

5617 5618

/* Fix all pending blocks and flush everything to disk */
5619 5620 5621 5622 5623 5624 5625 5626 5627 5628

int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param)
{
  uint nod_flag,length;
  my_off_t filepos,key_file_length;
  SORT_KEY_BLOCKS *key_block;
  MARIA_SORT_INFO *sort_info= sort_param->sort_info;
  myf myf_rw=sort_info->param->myf_rw;
  MARIA_HA *info=sort_info->info;
  MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
5629
  MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
5630 5631 5632 5633 5634 5635 5636
  DBUG_ENTER("_ma_flush_pending_blocks");

  filepos= HA_OFFSET_ERROR;			/* if empty file */
  nod_flag=0;
  for (key_block=sort_info->key_block ; key_block->inited ; key_block++)
  {
    key_block->inited=0;
5637
    length= _ma_get_page_used(info->s, key_block->buff);
5638 5639
    if (nod_flag)
      _ma_kpointer(info,key_block->end_pos,filepos);
5640
    key_file_length= info->s->state.state.key_file_length;
unknown's avatar
unknown committed
5641
    bzero(key_block->buff+length, keyinfo->block_length-length);
5642 5643
    if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
        HA_OFFSET_ERROR)
5644
      goto err;
5645 5646

    /* If we read the page from the key cache, we have to write it back */
5647
    if (page_link->changed)
5648
    {
5649
      pop_dynamic(&info->pinned_pages);
5650
      if (_ma_write_keypage(info, keyinfo, filepos,
5651
                            PAGECACHE_LOCK_WRITE_UNLOCK,
5652
                            DFLT_INIT_HITS, key_block->buff))
5653
	goto err;
5654
    }
unknown's avatar
unknown committed
5655 5656 5657 5658 5659
    else
    {
      put_crc(key_block->buff, filepos, info->s);
      if (my_pwrite(info->s->kfile.file, key_block->buff,
                    (uint) keyinfo->block_length,filepos, myf_rw))
5660
        goto err;
unknown's avatar
unknown committed
5661
    }
unknown's avatar
unknown committed
5662
    DBUG_DUMP("buff",key_block->buff,length);
5663 5664 5665
    nod_flag=1;
  }
  info->s->state.key_root[sort_param->key]=filepos; /* Last is root for tree */
5666
  _ma_fast_unlock_key_del(info);
5667
  DBUG_RETURN(0);
5668 5669 5670 5671

err:
  _ma_fast_unlock_key_del(info);
  DBUG_RETURN(1);
5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682
} /* _ma_flush_pending_blocks */

	/* alloc space and pointers for key_blocks */

static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
                                         uint buffer_length)
{
  reg1 uint i;
  SORT_KEY_BLOCKS *block;
  DBUG_ENTER("alloc_key_blocks");

unknown's avatar
unknown committed
5683 5684 5685
  if (!(block= (SORT_KEY_BLOCKS*) my_malloc((sizeof(SORT_KEY_BLOCKS)+
                                             buffer_length+IO_SIZE)*blocks,
                                            MYF(0))))
5686 5687 5688 5689 5690 5691 5692
  {
    _ma_check_print_error(param,"Not enough memory for sort-key-blocks");
    return(0);
  }
  for (i=0 ; i < blocks ; i++)
  {
    block[i].inited=0;
unknown's avatar
unknown committed
5693
    block[i].buff= (uchar*) (block+blocks)+(buffer_length+IO_SIZE)*i;
5694 5695 5696 5697 5698 5699 5700 5701 5702
  }
  DBUG_RETURN(block);
} /* alloc_key_blocks */


	/* Check if file is almost full */

int maria_test_if_almost_full(MARIA_HA *info)
{
5703 5704 5705
  MARIA_SHARE *share= info->s;

  if (share->options & HA_OPTION_COMPRESS_RECORD)
5706
    return 0;
5707
  return my_seek(share->kfile.file, 0L, MY_SEEK_END,
5708
                 MYF(MY_THREADSAFE))/10*9 >
5709
    (my_off_t) share->base.max_key_file_length ||
5710
    my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)) / 10 * 9 >
5711
    (my_off_t) share->base.max_data_file_length;
5712 5713
}

5714 5715

/* Recreate table with bigger more alloced record-data */
5716 5717 5718 5719 5720 5721 5722 5723

int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename)
{
  int error;
  MARIA_HA info;
  MARIA_SHARE share;
  MARIA_KEYDEF *keyinfo,*key,*key_end;
  HA_KEYSEG *keysegs,*keyseg;
5724
  MARIA_COLUMNDEF *columndef,*column,*end;
5725 5726 5727 5728 5729 5730
  MARIA_UNIQUEDEF *uniquedef,*u_ptr,*u_end;
  MARIA_STATUS_INFO status_info;
  uint unpack,key_parts;
  ha_rows max_records;
  ulonglong file_length,tmp_length;
  MARIA_CREATE_INFO create_info;
5731
  DBUG_ENTER("maria_recreate_table");
5732 5733 5734 5735 5736 5737

  error=1;					/* Default error */
  info= **org_info;
  status_info= (*org_info)->state[0];
  info.state= &status_info;
  share= *(*org_info)->s;
5738 5739
  unpack= ((share.data_file_type == COMPRESSED_RECORD) &&
           (param->testflag & T_UNPACK));
unknown's avatar
unknown committed
5740 5741
  if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF) *
                                          share.base.keys)))
5742
    DBUG_RETURN(0);
unknown's avatar
unknown committed
5743
  memcpy((uchar*) keyinfo,(uchar*) share.keyinfo,
5744 5745 5746 5747 5748 5749
	 (size_t) (sizeof(MARIA_KEYDEF)*share.base.keys));

  key_parts= share.base.all_key_parts;
  if (!(keysegs=(HA_KEYSEG*) my_alloca(sizeof(HA_KEYSEG)*
				       (key_parts+share.base.keys))))
  {
unknown's avatar
unknown committed
5750
    my_afree((uchar*) keyinfo);
5751
    DBUG_RETURN(1);
5752
  }
5753
  if (!(columndef=(MARIA_COLUMNDEF*)
5754 5755
	my_alloca(sizeof(MARIA_COLUMNDEF)*(share.base.fields+1))))
  {
unknown's avatar
unknown committed
5756 5757
    my_afree((uchar*) keyinfo);
    my_afree((uchar*) keysegs);
5758
    DBUG_RETURN(1);
5759 5760 5761 5762
  }
  if (!(uniquedef=(MARIA_UNIQUEDEF*)
	my_alloca(sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques+1))))
  {
unknown's avatar
unknown committed
5763 5764 5765
    my_afree((uchar*) columndef);
    my_afree((uchar*) keyinfo);
    my_afree((uchar*) keysegs);
5766
    DBUG_RETURN(1);
5767 5768
  }

5769 5770
  /* Copy the column definitions in their original order */
  for (column= share.columndef, end= share.columndef+share.base.fields;
5771 5772
       column != end ;
       column++)
5773
    columndef[column->column_nr]= *column;
5774 5775

  /* Change the new key to point at the saved key segments */
unknown's avatar
unknown committed
5776
  memcpy((uchar*) keysegs,(uchar*) share.keyparts,
5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790
	 (size_t) (sizeof(HA_KEYSEG)*(key_parts+share.base.keys+
				      share.state.header.uniques)));
  keyseg=keysegs;
  for (key=keyinfo,key_end=keyinfo+share.base.keys; key != key_end ; key++)
  {
    key->seg=keyseg;
    for (; keyseg->type ; keyseg++)
    {
      if (param->language)
	keyseg->language=param->language;	/* change language */
    }
    keyseg++;					/* Skip end pointer */
  }

unknown's avatar
unknown committed
5791 5792 5793 5794
  /*
    Copy the unique definitions and change them to point at the new key
    segments
  */
unknown's avatar
unknown committed
5795
  memcpy((uchar*) uniquedef,(uchar*) share.uniqueinfo,
5796 5797 5798 5799 5800 5801 5802
	 (size_t) (sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques)));
  for (u_ptr=uniquedef,u_end=uniquedef+share.state.header.uniques;
       u_ptr != u_end ; u_ptr++)
  {
    u_ptr->seg=keyseg;
    keyseg+=u_ptr->keysegs+1;
  }
5803 5804

  file_length=(ulonglong) my_seek(info.dfile.file, 0L, MY_SEEK_END, MYF(0));
5805 5806 5807
  if (share.options & HA_OPTION_COMPRESS_RECORD)
    share.base.records=max_records=info.state->records;
  else if (share.base.min_pack_length)
5808
    max_records=(ha_rows) (file_length / share.base.min_pack_length);
5809 5810 5811 5812 5813 5814 5815 5816 5817 5818
  else
    max_records=0;
  share.options&= ~HA_OPTION_TEMP_COMPRESS_RECORD;

  tmp_length= file_length+file_length/10;
  set_if_bigger(file_length,param->max_data_file_length);
  set_if_bigger(file_length,tmp_length);
  set_if_bigger(file_length,(ulonglong) share.base.max_data_file_length);

  VOID(maria_close(*org_info));
5819

5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830
  bzero((char*) &create_info,sizeof(create_info));
  create_info.max_rows=max(max_records,share.base.records);
  create_info.reloc_rows=share.base.reloc;
  create_info.old_options=(share.options |
			   (unpack ? HA_OPTION_TEMP_COMPRESS_RECORD : 0));

  create_info.data_file_length=file_length;
  create_info.auto_increment=share.state.auto_increment;
  create_info.language = (param->language ? param->language :
			  share.state.header.language);
  create_info.key_file_length=  status_info.key_file_length;
unknown's avatar
unknown committed
5831 5832 5833
  create_info.org_data_file_type= ((enum data_file_type)
                                   share.state.header.org_data_file_type);

5834 5835 5836 5837 5838
  /*
    Allow for creating an auto_increment key. This has an effect only if
    an auto_increment key exists in the original table.
  */
  create_info.with_auto_increment= TRUE;
unknown's avatar
unknown committed
5839
  create_info.null_bytes= share.base.null_bytes;
5840 5841
  create_info.transactional= share.base.born_transactional;

unknown's avatar
unknown committed
5842 5843 5844 5845 5846 5847
  /*
    We don't have to handle symlinks here because we are using
    HA_DONT_TOUCH_DATA
  */
  if (maria_create(filename, share.data_file_type,
                   share.base.keys - share.state.header.uniques,
5848
                   keyinfo, share.base.fields, columndef,
unknown's avatar
unknown committed
5849 5850 5851 5852 5853 5854 5855
                   share.state.header.uniques, uniquedef,
                   &create_info,
                   HA_DONT_TOUCH_DATA))
  {
    _ma_check_print_error(param,
                          "Got error %d when trying to recreate indexfile",
                          my_errno);
5856 5857
    goto end;
  }
5858 5859 5860 5861 5862 5863 5864
  *org_info= maria_open(filename,O_RDWR,
                        (HA_OPEN_FOR_REPAIR |
                         ((param->testflag & T_WAIT_FOREVER) ?
                          HA_OPEN_WAIT_IF_LOCKED :
                          (param->testflag & T_DESCRIPT) ?
                          HA_OPEN_IGNORE_IF_LOCKED :
                          HA_OPEN_ABORT_IF_LOCKED)));
5865 5866
  if (!*org_info)
  {
unknown's avatar
unknown committed
5867 5868 5869
    _ma_check_print_error(param,
                          "Got error %d when trying to open re-created indexfile",
                          my_errno);
5870 5871 5872 5873 5874
    goto end;
  }
  /* We are modifing */
  (*org_info)->s->options&= ~HA_OPTION_READ_ONLY_DATA;
  VOID(_ma_readinfo(*org_info,F_WRLCK,0));
5875
  (*org_info)->s->state.state.records= info.state->records;
5876 5877
  if (share.state.create_time)
    (*org_info)->s->state.create_time=share.state.create_time;
5878
#ifdef EXTERNAL_LOCKING
5879
  (*org_info)->s->state.unique= (*org_info)->this_unique= share.state.unique;
5880
#endif
5881 5882 5883 5884 5885 5886
  (*org_info)->s->state.state.checksum= info.state->checksum;
  (*org_info)->s->state.state.del= info.state->del;
  (*org_info)->s->state.dellink= share.state.dellink;
  (*org_info)->s->state.state.empty= info.state->empty;
  (*org_info)->s->state.state.data_file_length= info.state->data_file_length;
  *(*org_info)->state= (*org_info)->s->state.state;
5887
  if (maria_update_state_info(param,*org_info,UPDATE_TIME | UPDATE_STAT |
5888
                              UPDATE_OPEN_COUNT))
5889 5890 5891
    goto end;
  error=0;
end:
unknown's avatar
unknown committed
5892 5893 5894 5895
  my_afree((uchar*) uniquedef);
  my_afree((uchar*) keyinfo);
  my_afree((uchar*) columndef);
  my_afree((uchar*) keysegs);
5896
  DBUG_RETURN(error);
5897 5898 5899 5900 5901 5902 5903
}


	/* write suffix to data file if neaded */

int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile)
{
5904
  MARIA_HA *info=sort_info->new_info;
5905

5906
  if (info->s->data_file_type == COMPRESSED_RECORD && fix_datafile)
5907
  {
5908
    uchar buff[MEMMAP_EXTRA_MARGIN];
5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920
    bzero(buff,sizeof(buff));
    if (my_b_write(&info->rec_cache,buff,sizeof(buff)))
    {
      _ma_check_print_error(sort_info->param,
			   "%d when writing to datafile",my_errno);
      return 1;
    }
    sort_info->param->read_cache.end_of_file+=sizeof(buff);
  }
  return 0;
}

unknown's avatar
unknown committed
5921 5922

/* Update state and maria_chk time of indexfile */
5923 5924 5925

int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update)
{
5926
  MARIA_SHARE *share= info->s;
5927
  DBUG_ENTER("maria_update_state_info");
5928 5929 5930 5931 5932 5933 5934 5935 5936

  if (update & UPDATE_OPEN_COUNT)
  {
    share->state.open_count=0;
    share->global_changed=0;
  }
  if (update & UPDATE_STAT)
  {
    uint i, key_parts= mi_uint2korr(share->state.header.key_parts);
5937
    share->state.records_at_analyze= share->state.state.records;
5938
    share->state.changed&= ~STATE_NOT_ANALYZED;
5939
    if (share->state.state.records)
5940 5941 5942
    {
      for (i=0; i<key_parts; i++)
      {
5943
        if (!(share->state.rec_per_key_part[i]=param->new_rec_per_key_part[i]))
5944 5945 5946 5947 5948 5949 5950 5951 5952 5953
          share->state.changed|= STATE_NOT_ANALYZED;
      }
    }
  }
  if (update & (UPDATE_STAT | UPDATE_SORT | UPDATE_TIME | UPDATE_AUTO_INC))
  {
    if (update & UPDATE_TIME)
    {
      share->state.check_time= (long) time((time_t*) 0);
      if (!share->state.create_time)
5954
	share->state.create_time= share->state.check_time;
5955
    }
unknown's avatar
unknown committed
5956
    if (_ma_state_info_write(share, 1|2))
5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968
      goto err;
    share->changed=0;
  }
  {						/* Force update of status */
    int error;
    uint r_locks=share->r_locks,w_locks=share->w_locks;
    share->r_locks= share->w_locks= share->tot_locks= 0;
    error= _ma_writeinfo(info,WRITEINFO_NO_UNLOCK);
    share->r_locks=r_locks;
    share->w_locks=w_locks;
    share->tot_locks=r_locks+w_locks;
    if (!error)
5969
      DBUG_RETURN(0);
5970 5971 5972
  }
err:
  _ma_check_print_error(param,"%d when updating keyfile",my_errno);
5973
  DBUG_RETURN(1);
5974 5975
}

5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987
/*
  Update auto increment value for a table
  When setting the 'repair_only' flag we only want to change the
  old auto_increment value if its wrong (smaller than some given key).
  The reason is that we shouldn't change the auto_increment value
  for a table without good reason when only doing a repair; If the
  user have inserted and deleted rows, the auto_increment value
  may be bigger than the biggest current row and this is ok.

  If repair_only is not set, we will update the flag to the value in
  param->auto_increment is bigger than the biggest key.
*/
5988 5989 5990 5991

void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info,
                                   my_bool repair_only)
{
5992
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
5993
  uchar *record;
5994 5995
  DBUG_ENTER("update_auto_increment_key");

5996 5997
  if (!share->base.auto_key ||
      ! maria_is_key_active(share->state.key_map, share->base.auto_key - 1))
5998 5999 6000 6001 6002
  {
    if (!(param->testflag & T_VERY_SILENT))
      _ma_check_print_info(param,
			  "Table: %s doesn't have an auto increment key\n",
			  param->isam_file_name);
6003
    DBUG_VOID_RETURN;
6004 6005 6006 6007 6008 6009 6010 6011
  }
  if (!(param->testflag & T_SILENT) &&
      !(param->testflag & T_REP))
    printf("Updating MARIA file: %s\n", param->isam_file_name);
  /*
    We have to use an allocated buffer instead of info->rec_buff as
    _ma_put_key_in_record() may use info->rec_buff
  */
6012 6013
  if (!(record= (uchar*) my_malloc((size_t) share->base.default_rec_buff_size,
                                   MYF(0))))
6014 6015
  {
    _ma_check_print_error(param,"Not enough memory for extra record");
6016
    DBUG_VOID_RETURN;
6017 6018 6019
  }

  maria_extra(info,HA_EXTRA_KEYREAD,0);
6020
  if (maria_rlast(info, record, share->base.auto_key-1))
6021 6022 6023 6024 6025 6026
  {
    if (my_errno != HA_ERR_END_OF_FILE)
    {
      maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
      my_free((char*) record, MYF(0));
      _ma_check_print_error(param,"%d when reading last record",my_errno);
6027
      DBUG_VOID_RETURN;
6028 6029
    }
    if (!repair_only)
6030
      share->state.auto_increment=param->auto_increment_value;
6031 6032 6033
  }
  else
  {
6034 6035 6036
    const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
    ulonglong auto_increment=
      ma_retrieve_auto_increment(record + keyseg->start, keyseg->type);
6037
    set_if_bigger(share->state.auto_increment,auto_increment);
6038
    if (!repair_only)
6039
      set_if_bigger(share->state.auto_increment, param->auto_increment_value);
6040 6041 6042 6043
  }
  maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
  my_free((char*) record, MYF(0));
  maria_update_state_info(param, info, UPDATE_AUTO_INC);
6044
  DBUG_VOID_RETURN;
6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098
}


/*
  Update statistics for each part of an index

  SYNOPSIS
    maria_update_key_parts()
      keyinfo           IN  Index information (only key->keysegs used)
      rec_per_key_part  OUT Store statistics here
      unique            IN  Array of (#distinct tuples)
      notnull_tuples    IN  Array of (#tuples), or NULL
      records               Number of records in the table

  DESCRIPTION
    This function is called produce index statistics values from unique and
    notnull_tuples arrays after these arrays were produced with sequential
    index scan (the scan is done in two places: chk_index() and
    sort_key_write()).

    This function handles all 3 index statistics collection methods.

    Unique is an array:
      unique[0]= (#different values of {keypart1}) - 1
      unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1
      ...

    For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too:
      notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL)
      notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all
                          keypart{i} are not NULL)
      ...
    For all other statistics collection methods notnull_tuples==NULL.

    Output is an array:
    rec_per_key_part[k] =
     = E(#records in the table such that keypart_1=c_1 AND ... AND
         keypart_k=c_k for arbitrary constants c_1 ... c_k)

     = {assuming that values have uniform distribution and index contains all
        tuples from the domain (or that {c_1, ..., c_k} tuple is choosen from
        index tuples}

     = #tuples-in-the-index / #distinct-tuples-in-the-index.

    The #tuples-in-the-index and #distinct-tuples-in-the-index have different
    meaning depending on which statistics collection method is used:

    MI_STATS_METHOD_*  how are nulls compared?  which tuples are counted?
     NULLS_EQUAL            NULL == NULL           all tuples in table
     NULLS_NOT_EQUAL        NULL != NULL           all tuples in table
     IGNORE_NULLS               n/a             tuples that don't have NULLs
*/

6099
void maria_update_key_parts(MARIA_KEYDEF *keyinfo, double *rec_per_key_part,
6100 6101 6102
                      ulonglong *unique, ulonglong *notnull,
                      ulonglong records)
{
6103
  ulonglong count=0, unique_tuples;
6104 6105
  ulonglong tuples= records;
  uint parts;
6106
  double tmp;
6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124
  for (parts=0 ; parts < keyinfo->keysegs  ; parts++)
  {
    count+=unique[parts];
    unique_tuples= count + 1;
    if (notnull)
    {
      tuples= notnull[parts];
      /*
        #(unique_tuples not counting tuples with NULLs) =
          #(unique_tuples counting tuples with NULLs as different) -
          #(tuples with NULLs)
      */
      unique_tuples -= (records - notnull[parts]);
    }

    if (unique_tuples == 0)
      tmp= 1;
    else if (count == 0)
6125
      tmp= ulonglong2double(tuples); /* 1 unique tuple */
6126
    else
6127
      tmp= ulonglong2double(tuples) / ulonglong2double(unique_tuples);
6128 6129 6130 6131 6132 6133 6134

    /*
      for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
      let's ensure it is not
    */
    set_if_bigger(tmp,1);

6135
    *rec_per_key_part++= tmp;
6136 6137 6138 6139
  }
}


unknown's avatar
unknown committed
6140
static ha_checksum maria_byte_checksum(const uchar *buf, uint length)
6141 6142
{
  ha_checksum crc;
unknown's avatar
unknown committed
6143
  const uchar *end=buf+length;
6144
  for (crc=0; buf != end; buf++)
6145
    crc=((crc << 1) + *buf) +
6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175
      test(crc & (((ha_checksum) 1) << (8*sizeof(ha_checksum)-1)));
  return crc;
}

static my_bool maria_too_big_key_for_sort(MARIA_KEYDEF *key, ha_rows rows)
{
  uint key_maxlength=key->maxlength;
  if (key->flag & HA_FULLTEXT)
  {
    uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
                                  key->seg->charset->mbmaxlen;
    key_maxlength+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
  }
  return (key->flag & HA_SPATIAL) ||
          (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) &&
	  ((ulonglong) rows * key_maxlength >
	   (ulonglong) maria_max_temp_length));
}

/*
  Deactivate all not unique index that can be recreated fast
  These include packed keys on which sorting will use more temporary
  space than the max allowed file length or for which the unpacked keys
  will take much more space than packed keys.
  Note that 'rows' may be zero for the case when we don't know how many
  rows we will put into the file.
 */

void maria_disable_non_unique_index(MARIA_HA *info, ha_rows rows)
{
6176
  MARIA_SHARE *share= info->s;
6177 6178 6179
  MARIA_KEYDEF    *key=share->keyinfo;
  uint          i;

6180
  DBUG_ASSERT(share->state.state.records == 0 &&
6181 6182 6183
              (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES));
  for (i=0 ; i < share->base.keys ; i++,key++)
  {
6184 6185
    if (!(key->flag &
          (HA_NOSAME | HA_SPATIAL | HA_AUTO_KEY | HA_RTREE_INDEX)) &&
6186
        ! maria_too_big_key_for_sort(key,rows) && share->base.auto_key != i+1)
6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201
    {
      maria_clear_key_active(share->state.key_map, i);
      info->update|= HA_STATE_CHANGED;
    }
  }
}


/*
  Return TRUE if we can use repair by sorting
  One can set the force argument to force to use sorting
  even if the temporary file would be quite big!
*/

my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows,
6202
                               ulonglong key_map, my_bool force)
6203
{
6204
  MARIA_SHARE *share= info->s;
6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222
  MARIA_KEYDEF *key=share->keyinfo;
  uint i;

  /*
    maria_repair_by_sort only works if we have at least one key. If we don't
    have any keys, we should use the normal repair.
  */
  if (! maria_is_any_key_active(key_map))
    return FALSE;				/* Can't use sort */
  for (i=0 ; i < share->base.keys ; i++,key++)
  {
    if (!force && maria_too_big_key_for_sort(key,rows))
      return FALSE;
  }
  return TRUE;
}


6223 6224 6225 6226 6227 6228 6229 6230
/**
   @brief Create a new handle for manipulation the new record file

   @note
   It's ok for Recovery to have two MARIA_SHARE on the same index file
   because the one we create here is not transactional
*/

unknown's avatar
unknown committed
6231
static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file)
6232 6233 6234 6235 6236 6237 6238
{

  MARIA_SORT_INFO *sort_info= param->sort_info;
  MARIA_HA *info= sort_info->info;
  MARIA_HA *new_info;
  DBUG_ENTER("create_new_data_handle");

6239
  if (!(sort_info->new_info= maria_open(info->s->open_file_name.str, O_RDWR,
6240 6241 6242 6243
                                        HA_OPEN_COPY | HA_OPEN_FOR_REPAIR)))
    DBUG_RETURN(1);

  new_info= sort_info->new_info;
6244 6245 6246
  _ma_bitmap_set_pagecache_callbacks(&new_info->s->bitmap.file,
                                     new_info->s);
  _ma_set_data_pagecache_callbacks(&new_info->dfile, new_info->s);
6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263
  change_data_file_descriptor(new_info, new_file);
  maria_lock_database(new_info, F_EXTRA_LCK);
  if ((sort_info->param->testflag & T_UNPACK) &&
      info->s->data_file_type == COMPRESSED_RECORD)
  {
    (*new_info->s->once_end)(new_info->s);
    (*new_info->s->end)(new_info);
    restore_data_file_type(new_info->s);
    _ma_setup_functions(new_info->s);
    if ((*new_info->s->once_init)(new_info->s, new_file) ||
        (*new_info->s->init)(new_info))
      DBUG_RETURN(1);
  }
  _ma_reset_status(new_info);
  if (_ma_initialize_data_file(new_info->s, new_file))
    DBUG_RETURN(1);

6264
  /* Take into account any bitmap page created above: */
6265
  param->filepos= new_info->s->state.state.data_file_length;
6266 6267 6268 6269 6270 6271 6272 6273

  /* Use new virtual functions for key generation */
  info->s->keypos_to_recpos= new_info->s->keypos_to_recpos;
  info->s->recpos_to_keypos= new_info->s->recpos_to_keypos;
  DBUG_RETURN(0);
}


6274 6275 6276 6277 6278 6279 6280
static void
set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share)
{
  if ((sort_info->new_data_file_type=share->data_file_type) ==
      COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK)
  {
    MARIA_SHARE tmp;
unknown's avatar
unknown committed
6281
    sort_info->new_data_file_type= share->state.header.org_data_file_type;
6282
    /* Set delete_function for sort_delete_record() */
6283 6284
    tmp= *share;
    tmp.state.header.data_file_type= tmp.state.header.org_data_file_type;
6285 6286 6287 6288 6289
    tmp.options= ~HA_OPTION_COMPRESS_RECORD;
    _ma_setup_functions(&tmp);
    share->delete_record=tmp.delete_record;
  }
}
unknown's avatar
unknown committed
6290 6291 6292

static void restore_data_file_type(MARIA_SHARE *share)
{
6293
  MARIA_SHARE tmp_share;
unknown's avatar
unknown committed
6294 6295 6296 6297
  share->options&= ~HA_OPTION_COMPRESS_RECORD;
  mi_int2store(share->state.header.options,share->options);
  share->state.header.data_file_type=
    share->state.header.org_data_file_type;
6298
  share->data_file_type= share->state.header.data_file_type;
unknown's avatar
unknown committed
6299
  share->pack.header_length= 0;
6300 6301 6302 6303 6304 6305

  /* Use new virtual functions for key generation */
  tmp_share= *share;
  _ma_setup_functions(&tmp_share);
  share->keypos_to_recpos= tmp_share.keypos_to_recpos;
  share->recpos_to_keypos= tmp_share.recpos_to_keypos;
unknown's avatar
unknown committed
6306
}
6307 6308


6309 6310
static void change_data_file_descriptor(MARIA_HA *info, File new_file)
{
unknown's avatar
unknown committed
6311
  my_close(info->dfile.file, MYF(MY_WME));
6312
  info->dfile.file= info->s->bitmap.file.file= new_file;
6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328
  _ma_bitmap_reset_cache(info->s);
}


/**
   @brief Mark the data file to not be used

   @note
   This is used in repair when we want to ensure the handler will not
   write anything to the data file anymore
*/

static void unuse_data_file_descriptor(MARIA_HA *info)
{
  info->dfile.file= info->s->bitmap.file.file= -1;
  _ma_bitmap_reset_cache(info->s);
6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376
}


/*
  Copy all states that has to do with the data file

  NOTES
    This is done to copy the state from the data file generated from
    repair to the original handler
*/

static void copy_data_file_state(MARIA_STATE_INFO *to,
                                 MARIA_STATE_INFO *from)
{
  to->state.records=           from->state.records;
  to->state.del=               from->state.del;
  to->state.empty=             from->state.empty;
  to->state.data_file_length=  from->state.data_file_length;
  to->split=                   from->split;
  to->dellink=		       from->dellink;
  to->first_bitmap_with_space= from->first_bitmap_with_space;
}


/*
  Read 'safely' next record while scanning table.

  SYNOPSIS
    _ma_safe_scan_block_record()
    info                Maria handler
    record              Store found here

  NOTES
    - One must have called mi_scan() before this

    Differences compared to  _ma_scan_block_records() are:
    - We read all blocks, not only blocks marked by the bitmap to be safe
    - In case of errors, next read will read next record.
    - More sanity checks

  RETURN
    0   ok
    HA_ERR_END_OF_FILE  End of file
    #   error number
*/


static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
6377
                                      MARIA_HA *info, uchar *record)
6378
{
6379
  MARIA_SHARE *share= info->s;
6380 6381
  MARIA_RECORD_POS record_pos= info->cur_row.nextpos;
  pgcache_page_no_t page= sort_info->page;
6382 6383 6384 6385 6386 6387 6388 6389
  DBUG_ENTER("_ma_safe_scan_block_record");

  for (;;)
  {
    /* Find next row in current page */
    if (likely(record_pos < info->scan.number_of_rows))
    {
      uint length, offset;
6390
      uchar *data, *end_of_data;
6391 6392 6393 6394 6395 6396 6397 6398 6399
      char llbuff[22];

      while (!(offset= uint2korr(info->scan.dir)))
      {
        info->scan.dir-= DIR_ENTRY_SIZE;
        record_pos++;
        if (info->scan.dir < info->scan.dir_end)
        {
          _ma_check_print_info(sort_info->param,
unknown's avatar
unknown committed
6400
                               "Wrong directory on page %s",
6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411
                               llstr(page, llbuff));
          goto read_next_page;
        }
      }
      /* found row */
      info->cur_row.lastpos= info->scan.row_base_page + record_pos;
      info->cur_row.nextpos= record_pos + 1;
      data= info->scan.page_buff + offset;
      length= uint2korr(info->scan.dir + 2);
      end_of_data= data + length;
      info->scan.dir-= DIR_ENTRY_SIZE;          /* Point to previous row */
6412

6413
      if (end_of_data > info->scan.dir_end ||
6414
          offset < PAGE_HEADER_SIZE || length < share->base.min_block_length)
6415 6416 6417
      {
        _ma_check_print_info(sort_info->param,
                             "Wrong directory entry %3u at page %s",
6418
                             (uint) record_pos, llstr(page, llbuff));
6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437
        record_pos++;
        continue;
      }
      else
      {
        DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos));
        DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data));
      }
    }

read_next_page:
    /* Read until we find next head page */
    for (;;)
    {
      uint page_type;
      char llbuff[22];

      sort_info->page++;                        /* In case of errors */
      page++;
6438
      if (!(page % share->bitmap.pages_covered))
6439 6440 6441 6442 6443
      {
        /* Skip bitmap */
        page++;
        sort_info->page++;
      }
6444
      if ((my_off_t) (page + 1) * share->block_size > sort_info->filelength)
6445
        DBUG_RETURN(HA_ERR_END_OF_FILE);
6446
      if (!(pagecache_read(share->pagecache,
6447 6448 6449 6450
                           &info->dfile,
                           page, 0, info->scan.page_buff,
                           PAGECACHE_READ_UNKNOWN_PAGE,
                           PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
6451 6452 6453 6454
      {
        if (my_errno == HA_ERR_WRONG_CRC)
        {
          _ma_check_print_info(sort_info->param,
unknown's avatar
unknown committed
6455 6456
                               "Wrong CRC on datapage at %s",
                               llstr(page, llbuff));
6457 6458
          continue;
        }
6459
        DBUG_RETURN(my_errno);
6460
      }
6461 6462 6463 6464 6465 6466 6467 6468
      page_type= (info->scan.page_buff[PAGE_TYPE_OFFSET] &
                  PAGE_TYPE_MASK);
      if (page_type == HEAD_PAGE)
      {
        if ((info->scan.number_of_rows=
             (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) != 0)
          break;
        _ma_check_print_info(sort_info->param,
unknown's avatar
unknown committed
6469 6470
                             "Wrong head page at page %s",
                             llstr(page, llbuff));
6471 6472 6473 6474
      }
      else if (page_type >= MAX_PAGE_TYPE)
      {
        _ma_check_print_info(sort_info->param,
unknown's avatar
unknown committed
6475 6476
                             "Found wrong page type: %d at page %s",
                             page_type, llstr(page, llbuff));
6477 6478 6479 6480
      }
    }

    /* New head page */
6481
    info->scan.dir= (info->scan.page_buff + share->block_size -
6482 6483 6484 6485 6486 6487 6488 6489
                     PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE);
    info->scan.dir_end= (info->scan.dir -
                         (info->scan.number_of_rows - 1) *
                         DIR_ENTRY_SIZE);
    info->scan.row_base_page= ma_recordpos(page, 0);
    record_pos= 0;
  }
}
6490 6491


6492 6493
/**
   @brief Writes a LOGREC_REPAIR_TABLE record and updates create_rename_lsn
unknown's avatar
unknown committed
6494
   if needed (so that maria_read_log does not redo the repair).
6495 6496 6497 6498 6499 6500 6501 6502 6503

   @param  param            description of the REPAIR operation
   @param  info             table

   @return Operation status
     @retval 0      ok
     @retval 1      error (disk problem)
*/

unknown's avatar
unknown committed
6504
my_bool write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info)
6505
{
unknown's avatar
unknown committed
6506
  MARIA_SHARE *share= info->s;
unknown's avatar
unknown committed
6507
  /* in case this is maria_chk or recovery... */
6508
  if (translog_status == TRANSLOG_OK && !maria_in_recovery &&
6509
      share->base.born_transactional)
6510
  {
6511
    my_bool save_now_transactional= share->now_transactional;
6512

6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529
    /*
      For now this record is only informative. It could serve when applying
      logs to a backup, but that needs more thought. Assume table became
      corrupted. It is repaired, then some writes happen to it.
      Later we restore an old backup, and want to apply this REDO_REPAIR_TABLE
      record. For it to give the same result as originally, the table should
      be corrupted the same way, so applying previous REDOs should produce the
      same corruption; that's really not guaranteed (different execution paths
      in execution of REDOs vs runtime code so not same bugs hit, temporary
      hardware issues not repeatable etc). Corruption may not be repeatable.
      A reasonable solution is to execute the REDO_REPAIR_TABLE record and
      check if the checksum of the resulting table matches what it was at the
      end of the original repair (should be stored in log record); or execute
      the REDO_REPAIR_TABLE if the checksum of the table-before-repair matches
      was it was at the start of the original repair (should be stored in log
      record).
    */
6530
    LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
unknown's avatar
unknown committed
6531
    uchar log_data[FILEID_STORE_SIZE + 8 + 8];
unknown's avatar
unknown committed
6532
    LSN lsn;
6533

6534 6535 6536 6537
    /*
      testflag gives an idea of what REPAIR did (in particular T_QUICK
      or not: did it touch the data file or not?).
    */
unknown's avatar
unknown committed
6538
    int8store(log_data + FILEID_STORE_SIZE, param->testflag);
6539
    /* org_key_map is used when recreating index after a load data infile */
unknown's avatar
unknown committed
6540
    int8store(log_data + FILEID_STORE_SIZE + 8, param->org_key_map);
6541

6542
    log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
6543 6544
    log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);

6545
    share->now_transactional= 1;
unknown's avatar
unknown committed
6546
    if (unlikely(translog_write_record(&lsn, LOGREC_REDO_REPAIR_TABLE,
unknown's avatar
unknown committed
6547
                                       &dummy_transaction_object, info,
6548
                                       (translog_size_t) sizeof(log_data),
6549
                                       sizeof(log_array)/sizeof(log_array[0]),
unknown's avatar
unknown committed
6550
                                       log_array, log_data, NULL) ||
unknown's avatar
unknown committed
6551
                 translog_flush(lsn)))
unknown's avatar
unknown committed
6552
      return TRUE;
6553
    /*
unknown's avatar
unknown committed
6554
      The table's existence was made durable earlier (MY_SYNC_DIR passed to
unknown's avatar
unknown committed
6555 6556 6557 6558 6559 6560 6561 6562 6563 6564
      maria_change_to_newfile()). All pages have been flushed, state too, we
      need to force it to disk. Old REDOs should not be applied to the table,
      which is already enforced as skip_redos_lsn was increased in
      protect_against_repair_crash(). But if this is an explicit repair,
      even UNDO phase should ignore this table: create_rename_lsn should be
      increased, and this also serves for the REDO_REPAIR to be ignored by
      maria_read_log.
      The fully correct order would be: sync data and index file, remove crash
      mark and update LSNs then write state and sync index file. But at this
      point state (without crash mark) is already written.
6565
    */
unknown's avatar
unknown committed
6566
    if ((!(param->testflag & T_NO_CREATE_RENAME_LSN) &&
6567 6568
         _ma_update_state_lsns(share, lsn, share->state.create_trid, FALSE,
                               FALSE)) ||
unknown's avatar
unknown committed
6569 6570
        _ma_sync_table_files(info))
      return TRUE;
6571
    share->now_transactional= save_now_transactional;
6572
  }
unknown's avatar
unknown committed
6573 6574 6575 6576
  return FALSE;
}


6577 6578 6579 6580 6581 6582
/**
  Writes an UNDO record which if executed in UNDO phase, will empty the
  table. Such record is thus logged only in certain cases of bulk insert
  (table needs to be empty etc).
*/
my_bool write_log_record_for_bulk_insert(MARIA_HA *info)
unknown's avatar
unknown committed
6583
{
6584
  LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
unknown's avatar
unknown committed
6585 6586 6587
  uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE];
  LSN lsn;
  lsn_store(log_data, info->trn->undo_lsn);
6588
  log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
unknown's avatar
unknown committed
6589
  log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6590
  return translog_write_record(&lsn, LOGREC_UNDO_BULK_INSERT,
unknown's avatar
unknown committed
6591 6592 6593 6594 6595 6596 6597
                               info->trn, info,
                               (translog_size_t)
                               log_array[TRANSLOG_INTERNAL_PARTS +
                                         0].length,
                               TRANSLOG_INTERNAL_PARTS + 1, log_array,
                               log_data + LSN_STORE_SIZE, NULL) ||
    translog_flush(lsn); /* WAL */
6598
}
6599 6600 6601 6602


/* Give error message why reading of key page failed */

unknown's avatar
unknown committed
6603 6604
static void report_keypage_fault(HA_CHECK *param, MARIA_HA *info,
                                 my_off_t position)
6605 6606
{
  char buff[11];
unknown's avatar
unknown committed
6607
  uint32 block_size= info->s->block_size;
6608 6609 6610

  if (my_errno == HA_ERR_CRASHED)
    _ma_check_print_error(param,
unknown's avatar
unknown committed
6611 6612
                          "Wrong base information on indexpage at page: %s",
                          llstr(position / block_size, buff));
6613 6614
  else
    _ma_check_print_error(param,
unknown's avatar
unknown committed
6615
                          "Can't read indexpage from page: %s, "
6616
                          "error: %d",
unknown's avatar
unknown committed
6617
                          llstr(position / block_size, buff), my_errno);
6618
}
6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636


/**
  When we want to check a table, we verify that the transaction ids of rows
  and keys are not bigger than the biggest id generated by Maria so far, which
  is returned by the function below.

  @note If control file is not open, 0 may be returned; to not confuse
  this with a valid max trid of 0, the caller should notice that it failed to
  open the control file (ma_control_file_inited() can serve for that).
*/

static TrID max_trid_in_system(void)
{
  TrID id= trnman_get_max_trid(); /* 0 if transac manager not initialized */
  /* 'id' may be far bigger, if last shutdown is old */
  return max(id, max_trid_in_control_file);
}
unknown's avatar
unknown committed
6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658


static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid)
{
  char buff[22], buff2[22];
  if (!param->not_visible_rows_found++)
  {
    if (!ma_control_file_inited())
    {
      _ma_check_print_warning(param,
                              "Found row with transaction id %s but no maria_control_file was specified.  The table may be corrupted",
                              llstr(used_trid, buff));
    }
    else
    {
      _ma_check_print_error(param,
                            "Found row with transaction id %s when max transaction id according to maria_control_file is %s",
                            llstr(used_trid, buff),
                            llstr(param->max_trid, buff2));
    }
  }
}
6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677


/**
  Mark that we can retry normal repair if we used quick repair

  We shouldn't do this in case of disk error as in this case we are likely
  to loose much more than expected.
*/

void retry_if_quick(MARIA_SORT_PARAM *sort_param, int error)
{
  HA_CHECK *param=sort_param->sort_info->param;

  if (!sort_param->fix_datafile && error >= HA_ERR_FIRST)
  {
    param->retry_repair=1;
    param->testflag|=T_RETRY_WITHOUT_QUICK;
  }
}