ma_locking.c 16.5 KB
Newer Older
1 2 3 4
/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6 7 8 9 10 11 12 13 14 15 16

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/*
17 18 19
  Locking of Maria-tables.
  Must be first request before doing any furter calls to any Maria function.
  Is used to allow many process use the same non transactional Maria table
20 21 22 23 24 25 26 27 28 29
*/

#include "ma_ftdefs.h"

	/* lock table by F_UNLCK, F_RDLCK or F_WRLCK */

int maria_lock_database(MARIA_HA *info, int lock_type)
{
  int error;
  uint count;
30
  MARIA_SHARE *share= info->s;
31 32 33 34 35 36
  DBUG_ENTER("maria_lock_database");
  DBUG_PRINT("enter",("lock_type: %d  old lock %d  r_locks: %u  w_locks: %u "
                      "global_changed:  %d  open_count: %u  name: '%s'",
                      lock_type, info->lock_type, share->r_locks,
                      share->w_locks,
                      share->global_changed, share->state.open_count,
37
                      share->index_file_name.str));
38 39 40 41 42 43 44 45 46 47 48
  if (share->options & HA_OPTION_READ_ONLY_DATA ||
      info->lock_type == lock_type)
    DBUG_RETURN(0);
  if (lock_type == F_EXTRA_LCK)                 /* Used by TMP tables */
  {
    ++share->w_locks;
    ++share->tot_locks;
    info->lock_type= lock_type;
    DBUG_RETURN(0);
  }

49
  error=0;
50
  pthread_mutex_lock(&share->intern_lock);
unknown's avatar
unknown committed
51
  if (share->kfile.file >= 0)		/* May only be false on windows */
52 53 54 55 56
  {
    switch (lock_type) {
    case F_UNLCK:
      maria_ftparser_call_deinitializer(info);
      if (info->lock_type == F_RDLCK)
57
      {
58
	count= --share->r_locks;
59 60
        if (share->lock_restore_status)
          (*share->lock_restore_status)(info);
61
      }
62
      else
63
      {
64
	count= --share->w_locks;
65 66
        if (share->lock.update_status)
          (*share->lock.update_status)(info);
67
      }
68
      --share->tot_locks;
unknown's avatar
unknown committed
69
      if (info->lock_type == F_WRLCK && !share->w_locks)
70
      {
unknown's avatar
unknown committed
71
        /* pages of transactional tables get flushed at Checkpoint */
72 73 74 75
        if (!share->base.born_transactional && !share->temporary &&
            _ma_flush_table_files(info,
                                  share->delay_key_write ? MARIA_FLUSH_DATA :
                                  MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
unknown's avatar
unknown committed
76
                                  FLUSH_KEEP, FLUSH_KEEP))
unknown's avatar
unknown committed
77
          error= my_errno;
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
      }
      if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
      {
	if (end_io_cache(&info->rec_cache))
	{
	  error=my_errno;
          maria_print_error(info->s, HA_ERR_CRASHED);
	  maria_mark_crashed(info);
	}
      }
      if (!count)
      {
	DBUG_PRINT("info",("changed: %u  w_locks: %u",
			   (uint) share->changed, share->w_locks));
	if (share->changed && !share->w_locks)
	{
#ifdef HAVE_MMAP
95 96 97
          if ((share->mmaped_length !=
               share->state.state.data_file_length) &&
              (share->nonmmaped_inserts > MAX_NONMAPPED_INSERTS))
unknown's avatar
unknown committed
98
          {
99 100 101 102 103 104
            if (share->lock_key_trees)
              rw_wrlock(&share->mmap_lock);
            _ma_remap_file(info, share->state.state.data_file_length);
            share->nonmmaped_inserts= 0;
            if (share->lock_key_trees)
              rw_unlock(&share->mmap_lock);
unknown's avatar
unknown committed
105
          }
106
#endif
107
#ifdef EXTERNAL_LOCKING
108 109 110
	  share->state.process= share->last_process=share->this_process;
	  share->state.unique=   info->last_unique=  info->this_unique;
	  share->state.update_count= info->last_loop= ++info->this_loop;
111
#endif
unknown's avatar
unknown committed
112 113 114
          /* transactional tables rather flush their state at Checkpoint */
          if (!share->base.born_transactional)
          {
115 116
            if (_ma_state_info_write_sub(share->kfile.file, &share->state,
                                         MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET))
unknown's avatar
unknown committed
117 118 119 120 121 122 123
              error= my_errno;
            else
            {
              /* A value of 0 means below means "state flushed" */
              share->changed= 0;
            }
          }
124 125
	  if (maria_flush)
	  {
126
            if (_ma_sync_table_files(info))
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
	      error= my_errno;
	  }
	  else
	    share->not_flushed=1;
	  if (error)
          {
            maria_print_error(info->s, HA_ERR_CRASHED);
	    maria_mark_crashed(info);
          }
	}
      }
      info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
      info->lock_type= F_UNLCK;
      break;
    case F_RDLCK:
      if (info->lock_type == F_WRLCK)
      {
        /*
          Change RW to READONLY

          mysqld does not turn write locks to read locks,
          so we're never here in mysqld.
        */
	share->w_locks--;
	share->r_locks++;
	info->lock_type=lock_type;
	break;
      }
unknown's avatar
unknown committed
155
#ifdef MARIA_EXTERNAL_LOCKING
156 157
      if (!share->r_locks && !share->w_locks)
      {
unknown's avatar
unknown committed
158
        /* note that a transactional table should not do this */
159
	if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
160 161 162 163 164
	{
	  error=my_errno;
	  break;
	}
      }
unknown's avatar
unknown committed
165
#endif
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
      VOID(_ma_test_if_changed(info));
      share->r_locks++;
      share->tot_locks++;
      info->lock_type=lock_type;
      break;
    case F_WRLCK:
      if (info->lock_type == F_RDLCK)
      {						/* Change READONLY to RW */
	if (share->r_locks == 1)
	{
	  share->r_locks--;
	  share->w_locks++;
	  info->lock_type=lock_type;
	  break;
	}
      }
unknown's avatar
unknown committed
182
#ifdef MARIA_EXTERNAL_LOCKING
183 184 185 186 187 188
      if (!(share->options & HA_OPTION_READ_ONLY_DATA))
      {
	if (!share->w_locks)
	{
	  if (!share->r_locks)
	  {
unknown's avatar
unknown committed
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
            /*
              Note that transactional tables should not do this.
              If we enabled this code, we should make sure to skip it if
              born_transactional is true. We should not test
              now_transactional to decide if we can call
              _ma_state_info_read_dsk(), because it can temporarily be 0
              (TRUNCATE on a partitioned table) and thus it would make a state
              modification below without mutex, confusing a concurrent
              checkpoint running.
              Even if this code was enabled only for non-transactional tables:
              in scenario LOCK TABLE t1 WRITE; INSERT INTO t1; DELETE FROM t1;
              state on disk read by DELETE is obsolete as it was not flushed
              at the end of INSERT. MyISAM same. It however causes no issue as
              maria_delete_all_rows() calls _ma_reset_status() thus is not
              influenced by the obsolete read values.
            */
205
	    if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
206 207 208 209 210 211 212
	    {
	      error=my_errno;
	      break;
	    }
	  }
	}
      }
unknown's avatar
unknown committed
213
#endif /* defined(MARIA_EXTERNAL_LOCKING) */
214 215 216
      VOID(_ma_test_if_changed(info));

      info->lock_type=lock_type;
217
      info->invalidator=share->invalidator;
218 219 220 221
      share->w_locks++;
      share->tot_locks++;
      break;
    default:
222
      DBUG_ASSERT(0);
223 224 225
      break;				/* Impossible */
    }
  }
226 227 228 229 230 231
#ifdef __WIN__
  else
  {
    /*
       Check for bad file descriptors if this table is part
       of a merge union. Failing to capture this may cause
232
       a crash on windows if the table is renamed and
233 234
       later on referenced by the merge table.
     */
unknown's avatar
unknown committed
235
    if( info->owned_by_merge && (info->s)->kfile.file < 0 )
236 237 238 239 240
    {
      error = HA_ERR_NO_SUCH_TABLE;
    }
  }
#endif
241 242 243 244 245 246 247 248 249
  pthread_mutex_unlock(&share->intern_lock);
  DBUG_RETURN(error);
} /* maria_lock_database */


/****************************************************************************
 ** functions to read / write the state
****************************************************************************/

unknown's avatar
unknown committed
250 251 252
int _ma_readinfo(register MARIA_HA *info __attribute__ ((unused)),
                 int lock_type __attribute__ ((unused)),
                 int check_keybuffer __attribute__ ((unused)))
253
{
unknown's avatar
unknown committed
254
#ifdef MARIA_EXTERNAL_LOCKING
255 256 257 258
  DBUG_ENTER("_ma_readinfo");

  if (info->lock_type == F_UNLCK)
  {
259
    MARIA_SHARE *share= info->s;
260 261
    if (!share->tot_locks)
    {
unknown's avatar
unknown committed
262
      /* should not be done for transactional tables */
263
      if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
264
      {
265 266
        if (!my_errno)
          my_errno= HA_ERR_FILE_TOO_SHORT;
267 268 269 270 271
	DBUG_RETURN(1);
      }
    }
    if (check_keybuffer)
      VOID(_ma_test_if_changed(info));
272
    info->invalidator=share->invalidator;
273 274 275 276 277 278 279
  }
  else if (lock_type == F_WRLCK && info->lock_type == F_RDLCK)
  {
    my_errno=EACCES;				/* Not allowed to change */
    DBUG_RETURN(-1);				/* when have read_lock() */
  }
  DBUG_RETURN(0);
unknown's avatar
unknown committed
280 281 282
#else
  return 0;
#endif /* defined(MARIA_EXTERNAL_LOCKING) */
283 284 285 286 287 288
} /* _ma_readinfo */


/*
  Every isam-function that uppdates the isam-database MUST end with this
  request
unknown's avatar
unknown committed
289 290 291

  NOTES
    my_errno is not changed if this succeeds!
292 293 294 295 296
*/

int _ma_writeinfo(register MARIA_HA *info, uint operation)
{
  int error,olderror;
unknown's avatar
unknown committed
297
  MARIA_SHARE *share= info->s;
298 299 300 301 302
  DBUG_ENTER("_ma_writeinfo");
  DBUG_PRINT("info",("operation: %u  tot_locks: %u", operation,
		     share->tot_locks));

  error=0;
unknown's avatar
unknown committed
303
  if (share->tot_locks == 0 && !share->base.born_transactional)
304
  {
unknown's avatar
unknown committed
305
    /* transactional tables flush their state at Checkpoint */
306 307
    if (operation)
    {					/* Two threads can't be here */
unknown's avatar
unknown committed
308
      olderror= my_errno;               /* Remember last error */
309 310 311 312 313 314

#ifdef EXTERNAL_LOCKING
      /*
        The following only makes sense if we want to be allow two different
        processes access the same table at the same time
      */
315 316 317
      share->state.process= share->last_process=   share->this_process;
      share->state.unique=  info->last_unique=	   info->this_unique;
      share->state.update_count= info->last_loop= ++info->this_loop;
318 319
#endif

320 321 322 323
      if ((error=
           _ma_state_info_write_sub(share->kfile.file,
                                    &share->state,
                                    MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET)))
324 325 326 327
	olderror=my_errno;
#ifdef __WIN__
      if (maria_flush)
      {
unknown's avatar
unknown committed
328 329
	_commit(share->kfile.file);
	_commit(info->dfile.file);
330 331
      }
#endif
unknown's avatar
unknown committed
332
      my_errno=olderror;
333 334 335 336 337 338 339 340
    }
  }
  else if (operation)
    share->changed= 1;			/* Mark keyfile changed */
  DBUG_RETURN(error);
} /* _ma_writeinfo */


341 342 343 344
/*
  Test if an external process has changed the database
  (Should be called after readinfo)
*/
345 346 347

int _ma_test_if_changed(register MARIA_HA *info)
{
348
#ifdef EXTERNAL_LOCKING
349
  MARIA_SHARE *share= info->s;
350 351 352 353 354 355
  if (share->state.process != share->last_process ||
      share->state.unique  != info->last_unique ||
      share->state.update_count != info->last_loop)
  {						/* Keyfile has changed */
    DBUG_PRINT("info",("index file changed"));
    if (share->state.process != share->this_process)
unknown's avatar
unknown committed
356 357
      VOID(flush_pagecache_blocks(share->pagecache, &share->kfile,
                                  FLUSH_RELEASE));
358 359 360 361 362 363 364
    share->last_process=share->state.process;
    info->last_unique=	share->state.unique;
    info->last_loop=	share->state.update_count;
    info->update|=	HA_STATE_WRITTEN;	/* Must use file on next */
    info->data_changed= 1;			/* For maria_is_changed */
    return 1;
  }
365
#endif
366 367 368 369 370 371 372
  return (!(info->update & HA_STATE_AKTIV) ||
	  (info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED |
			   HA_STATE_KEY_CHANGED)));
} /* _ma_test_if_changed */


/*
373
  Put a mark in the .MAI file that someone is updating the table
374 375

  DOCUMENTATION
376
  state.open_count in the .MAI file is used the following way:
377
  - For the first change of the .MYI file in this process open_count is
unknown's avatar
unknown committed
378
    incremented by _ma_mark_file_changed(). (We have a write lock on the file
379 380 381 382 383 384 385
    when this happens)
  - In maria_close() it's decremented by _ma_decrement_open_count() if it
    was incremented in the same process.

  This mean that if we are the only process using the file, the open_count
  tells us if the MARIA file wasn't properly closed. (This is true if
  my_disable_locking is set).
unknown's avatar
unknown committed
386

387
  open_count is not maintained on disk for temporary tables.
388 389 390 391
*/

int _ma_mark_file_changed(MARIA_HA *info)
{
392
  uchar buff[3];
393
  register MARIA_SHARE *share= info->s;
394 395 396 397 398 399 400 401 402 403 404
  DBUG_ENTER("_ma_mark_file_changed");

  if (!(share->state.changed & STATE_CHANGED) || ! share->global_changed)
  {
    share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
			   STATE_NOT_OPTIMIZED_KEYS);
    if (!share->global_changed)
    {
      share->global_changed=1;
      share->state.open_count++;
    }
unknown's avatar
unknown committed
405
    /*
406 407 408 409 410 411 412 413
      Temp tables don't need an open_count as they are removed on crash.
      In theory transactional tables are fixed by log-based recovery, so don't
      need an open_count either, but if recovery has failed and logs have been
      removed (by maria-force-start-after-recovery-failures), we still need to
      detect dubious tables.
      If we didn't maintain open_count on disk for a table, after a crash
      we wouldn't know if it was closed at crash time (thus does not need a
      check) or not. So we would have to check all tables: overkill.
unknown's avatar
unknown committed
414
    */
415
    if (!share->temporary)
416 417 418
    {
      mi_int2store(buff,share->state.open_count);
      buff[2]=1;				/* Mark that it's changed */
unknown's avatar
unknown committed
419 420 421 422 423 424 425 426 427 428 429
      if (my_pwrite(share->kfile.file, buff, sizeof(buff),
                    sizeof(share->state.header) +
                    MARIA_FILE_OPEN_COUNT_OFFSET,
                    MYF(MY_NABP)))
        DBUG_RETURN(1);
    }
    /* Set uuid of file if not yet set (zerofilled file) */
    if (share->base.born_transactional &&
        !(share->state.changed & STATE_NOT_MOVABLE))
    {
      /* Lock table to current installation */
430
      if (_ma_set_uuid(info, 0) ||
431 432
          (share->state.create_rename_lsn == LSN_NEEDS_NEW_STATE_LSNS &&
           _ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE,
433
                                     trnman_get_min_trid(),
434
                                     TRUE, TRUE)))
unknown's avatar
unknown committed
435 436
        DBUG_RETURN(1);
      share->state.changed|= STATE_NOT_MOVABLE;
437 438 439 440 441
    }
  }
  DBUG_RETURN(0);
}

unknown's avatar
unknown committed
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
/*
  Check that a region is all zero

  SYNOPSIS
    check_if_zero()
    pos		Start of memory to check
    length	length of memory region

  NOTES
    Used mainly to detect rows with wrong extent information
*/

my_bool _ma_check_if_zero(uchar *pos, size_t length)
{
  uchar *end;
  for (end= pos+ length; pos != end ; pos++)
    if (pos[0] != 0)
      return 1;
  return 0;
}
462 463 464 465 466 467 468 469

/*
  This is only called by close or by extra(HA_FLUSH) if the OS has the pwrite()
  call.  In these context the following code should be safe!
 */

int _ma_decrement_open_count(MARIA_HA *info)
{
470
  uchar buff[2];
471
  register MARIA_SHARE *share= info->s;
472 473 474 475 476 477 478 479 480 481
  int lock_error=0,write_error=0;
  if (share->global_changed)
  {
    uint old_lock=info->lock_type;
    share->global_changed=0;
    lock_error=maria_lock_database(info,F_WRLCK);
    /* Its not fatal even if we couldn't get the lock ! */
    if (share->state.open_count > 0)
    {
      share->state.open_count--;
unknown's avatar
unknown committed
482
      share->changed= 1;                        /* We have to update state */
483
      if (!share->temporary)
unknown's avatar
unknown committed
484 485
      {
        mi_int2store(buff,share->state.open_count);
unknown's avatar
unknown committed
486 487 488 489
        write_error= (int) my_pwrite(share->kfile.file, buff, sizeof(buff),
                                     sizeof(share->state.header) +
                                     MARIA_FILE_OPEN_COUNT_OFFSET,
                                     MYF(MY_NABP));
unknown's avatar
unknown committed
490
      }
491 492 493 494 495 496
    }
    if (!lock_error)
      lock_error=maria_lock_database(info,old_lock);
  }
  return test(lock_error || write_error);
}
497 498 499 500


/** @brief mark file as crashed */

unknown's avatar
unknown committed
501
void _ma_mark_file_crashed(MARIA_SHARE *share)
502 503 504 505 506 507
{
  uchar buff[2];
  DBUG_ENTER("_ma_mark_file_crashed");

  share->state.changed|= STATE_CRASHED;
  mi_int2store(buff, share->state.changed);
unknown's avatar
unknown committed
508 509 510 511 512 513 514 515 516
  /*
    We can ignore the errors, as if the mark failed, there isn't anything
    else we can do;  The user should already have got an error that the
    table was crashed.
  */
  (void) my_pwrite(share->kfile.file, buff, sizeof(buff),
                   sizeof(share->state.header) +
                   MARIA_FILE_CHANGED_OFFSET,
                   MYF(MY_NABP));
unknown's avatar
unknown committed
517
  DBUG_VOID_RETURN;
unknown's avatar
unknown committed
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
}


/**
   @brief Set uuid of for a Maria file

   @fn _ma_set_uuid()
   @param info		Maria handler
   @param reset_uuid    Instead of setting file to maria_uuid, set it to
			0 to mark it as movable
*/

my_bool _ma_set_uuid(MARIA_HA *info, my_bool reset_uuid)
{
  uchar buff[MY_UUID_SIZE], *uuid;

  uuid= maria_uuid;
  if (reset_uuid)
  {
    bzero(buff, sizeof(buff));
    uuid= buff;
  }
  return (my_bool) my_pwrite(info->s->kfile.file, uuid, MY_UUID_SIZE,
                             mi_uint2korr(info->s->state.header.base_pos),
                             MYF(MY_NABP));
543
}