ha_innodb.cc 138 KB
Newer Older
unknown's avatar
unknown committed
1
/* Copyright (C) 2000 MySQL AB & Innobase Oy
2

3 4 5 6
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
7

8 9 10 11
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
12

13 14 15 16
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

unknown's avatar
unknown committed
17
/* This file defines the InnoDB handler: the interface between MySQL and
18 19 20
InnoDB
NOTE: You can only use noninlined InnoDB functions in this file, because we
have disables the InnoDB inlining in this file. */
21 22 23 24 25 26

#ifdef __GNUC__
#pragma implementation				// gcc: Class implementation
#endif

#include "mysql_priv.h"
unknown's avatar
unknown committed
27
#include "slave.h"
unknown's avatar
unknown committed
28

29 30 31 32 33 34
#ifdef HAVE_INNOBASE_DB
#include <m_ctype.h>
#include <assert.h>
#include <hash.h>
#include <myisampack.h>

35 36
#define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1))

37
#include "ha_innodb.h"
unknown's avatar
unknown committed
38

unknown's avatar
unknown committed
39 40
pthread_mutex_t innobase_mutex;

41
/* Store MySQL definition of 'byte': in Linux it is char while InnoDB
42 43 44
uses unsigned char; the header univ.i which we include next defines
'byte' as a macro which expands to 'unsigned char' */

45
typedef byte	mysql_byte;
unknown's avatar
unknown committed
46

unknown's avatar
unknown committed
47 48
#define INSIDE_HA_INNOBASE_CC

49
/* Include necessary InnoDB headers */
50
extern "C" {
unknown's avatar
unknown committed
51
#include "../innobase/include/univ.i"
unknown's avatar
unknown committed
52
#include "../innobase/include/os0file.h"
unknown's avatar
unknown committed
53
#include "../innobase/include/os0thread.h"
unknown's avatar
unknown committed
54 55 56 57
#include "../innobase/include/srv0start.h"
#include "../innobase/include/srv0srv.h"
#include "../innobase/include/trx0roll.h"
#include "../innobase/include/trx0trx.h"
unknown's avatar
unknown committed
58
#include "../innobase/include/trx0sys.h"
unknown's avatar
unknown committed
59 60 61 62 63
#include "../innobase/include/row0ins.h"
#include "../innobase/include/row0mysql.h"
#include "../innobase/include/row0sel.h"
#include "../innobase/include/row0upd.h"
#include "../innobase/include/log0log.h"
64
#include "../innobase/include/lock0lock.h"
unknown's avatar
unknown committed
65 66 67
#include "../innobase/include/dict0crea.h"
#include "../innobase/include/btr0cur.h"
#include "../innobase/include/btr0btr.h"
68
#include "../innobase/include/fsp0fsp.h"
69
#include "../innobase/include/sync0sync.h"
70 71 72 73 74
}

#define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */
#define HA_INNOBASE_RANGE_COUNT	  100

unknown's avatar
unknown committed
75
bool 	innodb_skip 		= 0;
76 77
uint 	innobase_init_flags 	= 0;
ulong 	innobase_cache_size 	= 0;
78

unknown's avatar
unknown committed
79 80 81
/* The default values for the following, type long, start-up parameters
are declared in mysqld.cc: */

82
long innobase_mirrored_log_groups, innobase_log_files_in_group,
83 84
     innobase_log_file_size, innobase_log_buffer_size,
     innobase_buffer_pool_size, innobase_additional_mem_pool_size,
unknown's avatar
Merge  
unknown committed
85
     innobase_file_io_threads, innobase_lock_wait_timeout,
unknown's avatar
unknown committed
86 87
     innobase_thread_concurrency, innobase_force_recovery;

unknown's avatar
unknown committed
88 89
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
unknown's avatar
unknown committed
90
  
unknown's avatar
unknown committed
91
char*	innobase_data_home_dir			= NULL;
unknown's avatar
unknown committed
92
char*	innobase_data_file_path 		= NULL;
unknown's avatar
unknown committed
93 94
char*	innobase_log_group_home_dir		= NULL;
char*	innobase_log_arch_dir			= NULL;
unknown's avatar
unknown committed
95 96
/* The following has a misleading name: starting from 4.0.5, this also
affects Windows: */
unknown's avatar
unknown committed
97 98 99 100 101
char*	innobase_unix_file_flush_method		= NULL;

/* Below we have boolean-valued start-up parameters, and their default
values */

unknown's avatar
unknown committed
102
uint	innobase_flush_log_at_trx_commit	= 1;
unknown's avatar
unknown committed
103 104 105
my_bool innobase_log_archive			= FALSE;
my_bool	innobase_use_native_aio			= FALSE;
my_bool	innobase_fast_shutdown			= TRUE;
106

unknown's avatar
unknown committed
107
static char *internal_innobase_data_file_path	= NULL;
108

109
/* The following counter is used to convey information to InnoDB
110 111 112 113 114
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
it every INNOBASE_WAKE_INTERVAL'th step. */

#define INNOBASE_WAKE_INTERVAL	32
115
ulong	innobase_active_counter	= 0;
116 117 118

char*	innobase_home 	= NULL;

unknown's avatar
unknown committed
119 120
char    innodb_dummy_stmt_trx_handle = 'D';

unknown's avatar
unknown committed
121
static HASH 	innobase_open_tables;
122

123
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
124 125 126 127 128 129 130
			      my_bool not_used __attribute__((unused)));
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static void innobase_print_error(const char* db_errpfx, char* buffer);

/* General functions */

unknown's avatar
unknown committed
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_enter_innodb(
/*=========================*/
	trx_t*	trx)	/* in: transaction handle */
{
	if (srv_thread_concurrency >= 500) {

		return;
	}

	srv_conc_enter_innodb(trx);
}

/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_exit_innodb(
/*========================*/
	trx_t*	trx)	/* in: transaction handle */
{
	if (srv_thread_concurrency >= 500) {

		return;
	}

	srv_conc_exit_innodb(trx);
}

unknown's avatar
unknown committed
165
/**********************************************************************
unknown's avatar
unknown committed
166
Releases possible search latch and InnoDB thread FIFO ticket. These should
unknown's avatar
unknown committed
167 168 169 170
be released at each SQL statement end, and also when mysqld passes the
control to the client. It does no harm to release these also in the middle
of an SQL statement. */
inline
unknown's avatar
unknown committed
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
void
innobase_release_stat_resources(
/*============================*/
	trx_t*	trx)	/* in: transaction object */
{
	if (trx->has_search_latch) {
		trx_search_latch_release_if_reserved(trx);
	}

	if (trx->declared_to_be_inside_innodb) {
		/* Release our possible ticket in the FIFO */

		srv_conc_force_exit_innodb(trx);
	}
}

187 188 189 190 191 192 193 194 195 196 197 198 199
/************************************************************************
Call this function when mysqld passes control to the client. That is to
avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
documentation, see handler.cc. */

void
innobase_release_temporary_latches(
/*===============================*/
        void*   innobase_tid)
{
        innobase_release_stat_resources((trx_t*)innobase_tid);
}

200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
/************************************************************************
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
when a single database operation may introduce a small need for
server utility activity, like checkpointing. */
inline
void
innobase_active_small(void)
/*=======================*/
{
	innobase_active_counter++;

	if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
		srv_active_wake_master_thread();
	}
}

217
/************************************************************************
unknown's avatar
unknown committed
218 219 220
Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
timeout or a deadlock. */
221 222 223 224 225
static
int
convert_error_code_to_mysql(
/*========================*/
			/* out: MySQL error code */
unknown's avatar
unknown committed
226 227
	int	error,	/* in: InnoDB error code */
	THD*	thd)	/* in: user thread handle or NULL */
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
{
	if (error == DB_SUCCESS) {

		return(0);

  	} else if (error == (int) DB_DUPLICATE_KEY) {

    		return(HA_ERR_FOUND_DUPP_KEY);

 	} else if (error == (int) DB_RECORD_NOT_FOUND) {

    		return(HA_ERR_NO_ACTIVE_RECORD);

 	} else if (error == (int) DB_ERROR) {

unknown's avatar
unknown committed
243
    		return(-1); /* unspecified error */
244 245

 	} else if (error == (int) DB_DEADLOCK) {
unknown's avatar
unknown committed
246
 		/* Since we rolled back the whole transaction, we must
unknown's avatar
unknown committed
247 248 249 250 251 252
 		tell it also to MySQL so that MySQL knows to empty the
 		cached binlog for this transaction */

 		if (thd) {
 			ha_rollback(thd);
 		}
253

254 255 256 257
    		return(HA_ERR_LOCK_DEADLOCK);

 	} else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {

unknown's avatar
unknown committed
258
 		/* Since we rolled back the whole transaction, we must
unknown's avatar
unknown committed
259 260 261 262 263 264 265
 		tell it also to MySQL so that MySQL knows to empty the
 		cached binlog for this transaction */

 		if (thd) {
 			ha_rollback(thd);
 		}

unknown's avatar
Merge  
unknown committed
266
    		return(HA_ERR_LOCK_WAIT_TIMEOUT);
267 268 269

 	} else if (error == (int) DB_NO_REFERENCED_ROW) {

unknown's avatar
Merge  
unknown committed
270
    		return(HA_ERR_NO_REFERENCED_ROW);
271 272 273

 	} else if (error == (int) DB_ROW_IS_REFERENCED) {

unknown's avatar
Merge  
unknown committed
274
    		return(HA_ERR_ROW_IS_REFERENCED);
275

276
        } else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) {
277

unknown's avatar
Merge  
unknown committed
278
    		return(HA_ERR_CANNOT_ADD_FOREIGN);
279

unknown's avatar
unknown committed
280 281
        } else if (error == (int) DB_CANNOT_DROP_CONSTRAINT) {

unknown's avatar
unknown committed
282
    		return(HA_ERR_ROW_IS_REFERENCED);
unknown's avatar
unknown committed
283

284 285
        } else if (error == (int) DB_COL_APPEARS_TWICE_IN_INDEX) {

unknown's avatar
unknown committed
286
    		return(HA_ERR_CRASHED);
287

288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
 	} else if (error == (int) DB_OUT_OF_FILE_SPACE) {

    		return(HA_ERR_RECORD_FILE_FULL);

 	} else if (error == (int) DB_TABLE_IS_BEING_USED) {

    		return(HA_ERR_WRONG_COMMAND);

 	} else if (error == (int) DB_TABLE_NOT_FOUND) {

    		return(HA_ERR_KEY_NOT_FOUND);

  	} else if (error == (int) DB_TOO_BIG_RECORD) {

    		return(HA_ERR_TO_BIG_ROW);
unknown's avatar
unknown committed
303 304 305 306

  	} else if (error == (int) DB_CORRUPTION) {

    		return(HA_ERR_CRASHED);
unknown's avatar
unknown committed
307 308 309
  	} else if (error == (int) DB_NO_SAVEPOINT) {

    		return(HA_ERR_NO_SAVEPOINT);
310
    	} else {
unknown's avatar
unknown committed
311
    		return(-1);			// Unknown error
312 313 314
    	}
}

315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
/*****************************************************************
If you want to print a thd that is not associated with the current thread,
you must call this function before reserving the InnoDB kernel_mutex, to
protect MySQL from setting thd->query NULL. If you print a thd of the current
thread, we know that MySQL cannot modify thd->query, and it is not necessary
to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
the kernel_mutex.
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_prepare_print_arbitrary_thd(void)
/*============================================*/
{
	VOID(pthread_mutex_lock(&LOCK_thread_count));
}

/*****************************************************************
Relases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_end_print_arbitrary_thd(void)
/*========================================*/
{
	VOID(pthread_mutex_unlock(&LOCK_thread_count));
}

344 345
/*****************************************************************
Prints info of a THD object (== user session thread) to the
unknown's avatar
unknown committed
346
standard output. NOTE that /mysql/innobase/trx/trx0trx.c must contain
347
the prototype for this function! */
348
extern "C"
349 350 351
void
innobase_mysql_print_thd(
/*=====================*/
352
	FILE*   f,	/* in: output stream */
353
        void*   input_thd)/* in: pointer to a MySQL THD object */
354
{
355 356
	const THD*	thd;
	const char*	s;
unknown's avatar
unknown committed
357
	char		buf[301];
358

359
        thd = (const THD*) input_thd;
360

361 362 363 364 365 366
  	fprintf(f, "MySQL thread id %lu, query id %lu",
		thd->thread_id, thd->query_id);
	if (thd->host) {
		putc(' ', f);
		fputs(thd->host, f);
	}
367

368 369 370 371
	if (thd->ip) {
		putc(' ', f);
		fputs(thd->ip, f);
	}
372

373
  	if (thd->user) {
374 375
		putc(' ', f);
		fputs(thd->user, f);
376 377
  	}

378
	if ((s = thd->proc_info)) {
379
		putc(' ', f);
380
		fputs(s, f);
381
	}
382

383
	if ((s = thd->query)) {
unknown's avatar
unknown committed
384
		/* determine the length of the query string */
unknown's avatar
unknown committed
385 386 387 388 389
		uint32 i, len;
		
		len = thd->query_length;

		if (len > 300) {
390
			len = 300;	/* ADDITIONAL SAFETY: print at most
unknown's avatar
unknown committed
391
					300 chars to reduce the probability of
392
					a seg fault if there is a race in
unknown's avatar
unknown committed
393 394 395
					thd->query_length in MySQL; after
					May 14, 2004 probably no race any more,
					but better be safe */
unknown's avatar
unknown committed
396 397
		}
		
398 399 400
                /* Use strmake to reduce the timeframe
                   for a race, compared to fwrite() */
		i= (uint) (strmake(buf, s, len) - buf);
unknown's avatar
unknown committed
401
		putc('\n', f);
unknown's avatar
unknown committed
402
		fwrite(buf, 1, i, f);
403
	}
404

405
	putc('\n', f);
406 407
}

408
/*************************************************************************
409 410
Gets the InnoDB transaction handle for a MySQL handler object, creates
an InnoDB transaction struct if the corresponding MySQL thread struct still
411
lacks one. */
412
static
413 414 415
trx_t*
check_trx_exists(
/*=============*/
416
			/* out: InnoDB transaction handle */
417 418 419 420
	THD*	thd)	/* in: user thread handle */
{
	trx_t*	trx;

unknown's avatar
unknown committed
421 422
	ut_a(thd == current_thd);

unknown's avatar
unknown committed
423
	trx = (trx_t*) thd->transaction.all.innobase_tid;
424 425

	if (trx == NULL) {
unknown's avatar
unknown committed
426
	        DBUG_ASSERT(thd != NULL);
427
		trx = trx_allocate_for_mysql();
428

429
		trx->mysql_thd = thd;
unknown's avatar
unknown committed
430 431
		trx->mysql_query_str = &((*thd).query);
		
unknown's avatar
unknown committed
432
		thd->transaction.all.innobase_tid = trx;
433

unknown's avatar
unknown committed
434
		/* The execution of a single SQL statement is denoted by
435
		a 'transaction' handle which is a dummy pointer: InnoDB
unknown's avatar
unknown committed
436 437
		remembers internally where the latest SQL statement
		started, and if error handling requires rolling back the
438
		latest statement, InnoDB does a rollback to a savepoint. */
unknown's avatar
unknown committed
439

unknown's avatar
unknown committed
440 441
		thd->transaction.stmt.innobase_tid =
		                  (void*)&innodb_dummy_stmt_trx_handle;
unknown's avatar
unknown committed
442
	} else {
unknown's avatar
unknown committed
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
		if (trx->magic_n != TRX_MAGIC_N) {
			mem_analyze_corruption((byte*)trx);

			ut_a(0);
		}
	}

	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	} else {
		trx->check_foreigns = TRUE;
	}

	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
		trx->check_unique_secondary = FALSE;
	} else {
		trx->check_unique_secondary = TRUE;
460 461 462 463 464 465
	}

	return(trx);
}

/*************************************************************************
466
Updates the user_thd field in a handle and also allocates a new InnoDB
467 468
transaction handle if needed, and updates the transaction fields in the
prebuilt struct. */
469
inline
470 471 472 473 474 475
int
ha_innobase::update_thd(
/*====================*/
			/* out: 0 or error code */
	THD*	thd)	/* in: thd to use the handle */
{
476 477
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;
unknown's avatar
unknown committed
478
	
479 480
	trx = check_trx_exists(thd);

481
	if (prebuilt->trx != trx) {
482

483
		row_update_prebuilt_trx(prebuilt, trx);
484 485 486
	}

	user_thd = thd;
487

488 489 490
	return(0);
}

unknown's avatar
unknown committed
491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571

/*   BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
     ------------------------------------------------------------

1) The use of the query cache for TBL is disabled when there is an
uncommitted change to TBL.

2) When a change to TBL commits, InnoDB stores the current value of
its global trx id counter, let us denote it by INV_TRX_ID, to the table object
in the InnoDB data dictionary, and does only allow such transactions whose
id >= INV_TRX_ID to use the query cache.

3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
of TBL immediately.

How this is implemented inside InnoDB:

1) Since every modification always sets an IX type table lock on the InnoDB
table, it is easy to check if there can be uncommitted modifications for a
table: just check if there are locks in the lock list of the table.

2) When a transaction inside InnoDB commits, it reads the global trx id
counter and stores the value INV_TRX_ID to the tables on which it had a lock.

3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
InnoDB calls an invalidate method for the MySQL query cache for that table.

How this is implemented inside sql_cache.cc:

1) The query cache for an InnoDB table TBL is invalidated immediately at an
INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
invalidation to the transaction commit.

2) To store or retrieve a value from the query cache of an InnoDB table TBL,
any query must first ask InnoDB's permission. We must pass the thd as a
parameter because InnoDB will look at the trx id, if any, associated with
that thd.

3) Use of the query cache for InnoDB tables is now allowed also when
AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
put restrictions on the use of the query cache.
*/

/**********************************************************************
The MySQL query cache uses this to check from InnoDB if the query cache at
the moment is allowed to operate on an InnoDB table. The SQL query must
be a non-locking SELECT.

The query cache is allowed to operate on certain query only if this function
returns TRUE for all tables in the query.

If thd is not in the autocommit state, this function also starts a new
transaction for thd if there is no active trx yet, and assigns a consistent
read view to it if there is no read view yet. */

my_bool
innobase_query_caching_of_table_permitted(
/*======================================*/
				/* out: TRUE if permitted, FALSE if not;
				note that the value FALSE does not mean
				we should invalidate the query cache:
				invalidation is called explicitly */
	THD*	thd,		/* in: thd of the user who is trying to
				store a result to the query cache or
				retrieve it */
	char*	full_name,	/* in: concatenation of database name,
				the null character '\0', and the table
				name */
	uint	full_name_len)	/* in: length of the full name, i.e.
				len(dbname) + len(tablename) + 1 */
{
	ibool	is_autocommit;
	trx_t*	trx;
	char*	ptr;
	char	norm_name[1000];

	ut_a(full_name_len < 999);

	if (thd->variables.tx_isolation == ISO_SERIALIZABLE) {
		/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
unknown's avatar
unknown committed
572
		plain SELECT if AUTOCOMMIT is not on. */
unknown's avatar
unknown committed
573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641
	
		return((my_bool)FALSE);
	}

	trx = (trx_t*) thd->transaction.all.innobase_tid;

	if (trx == NULL) {
		trx = check_trx_exists(thd);
	}

	innobase_release_stat_resources(trx);

	if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {

		is_autocommit = TRUE;
	} else {
		is_autocommit = FALSE;

	}

	if (is_autocommit && trx->conc_state == TRX_NOT_STARTED) {
		/* We are going to retrieve the query result from the
		query cache. This cannot be a store operation because then
		we would have started the trx already.

		We can imagine we instantaneously serialize
		this consistent read trx to the current trx id counter.
		If trx2 would have changed the tables of a query
		result stored in the cache, and trx2 would have already
		committed, making the result obsolete, then trx2 would have
		already invalidated the cache. Thus we can trust the result
		in the cache is ok for this query. */

		return((my_bool)TRUE);
	}
	
	/* Normalize the table name to InnoDB format */

	memcpy(norm_name, full_name, full_name_len);

	norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the
					    separator between db and table */
	norm_name[full_name_len] = '\0';
#ifdef __WIN__
	/* Put to lower case */

	ptr = norm_name;

	while (*ptr != '\0') {
	        *ptr = tolower(*ptr);
	        ptr++;
	}
#endif
	if (row_search_check_if_query_cache_permitted(trx, norm_name)) {

		printf("Query cache for %s permitted\n", norm_name);

		return((my_bool)TRUE);
	}

	printf("Query cache for %s NOT permitted\n", norm_name);

	return((my_bool)FALSE);
}

/*********************************************************************
Invalidates the MySQL query cache for the table.
NOTE that the exact prototype of this function has to be in
/innobase/row/row0ins.c! */
642
extern "C"
unknown's avatar
unknown committed
643 644 645 646 647
void
innobase_invalidate_query_cache(
/*============================*/
	trx_t*	trx,		/* in: transaction which modifies the table */
	char*	full_name,	/* in: concatenation of database name, null
unknown's avatar
unknown committed
648 649 650 651 652
				char '\0', table name, null char'\0';
				NOTE that in Windows this is always
				in LOWER CASE! */
	ulint	full_name_len)	/* in: full name length where also the null
				chars count */
unknown's avatar
unknown committed
653 654
{
	/* Argument TRUE below means we are using transactions */
655
#ifdef HAVE_QUERY_CACHE
unknown's avatar
unknown committed
656 657 658 659
	query_cache.invalidate((THD*)(trx->mysql_thd),
					(const char*)full_name,
					(uint32)full_name_len,
					TRUE);
660
#endif
unknown's avatar
unknown committed
661
}
662 663 664 665 666 667 668 669 670 671 672

/*********************************************************************
Get the quote character to be used in SQL identifiers. */
extern "C"
char
mysql_get_identifier_quote_char(void)
/*=================================*/
				/* out: quote character to be
				used in SQL identifiers */
{
	return '`';
unknown's avatar
unknown committed
673 674
}

675 676 677 678
/*********************************************************************
Call this when you have opened a new table handle in HANDLER, before you
call index_read_idx() etc. Actually, we can let the cursor stay open even
over a transaction commit! Then you should call this before every operation,
679
fetch next etc. This function inits the necessary things even after a
680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
transaction commit. */

void
ha_innobase::init_table_handle_for_HANDLER(void)
/*============================================*/
{
        row_prebuilt_t* prebuilt;

        /* If current thd does not yet have a trx struct, create one.
        If the current handle does not yet have a prebuilt struct, create
        one. Update the trx pointers in the prebuilt struct. Normally
        this operation is done in external_lock. */

        update_thd(current_thd);

        /* Initialize the prebuilt struct much like it would be inited in
        external_lock */

        prebuilt = (row_prebuilt_t*)innobase_prebuilt;

unknown's avatar
unknown committed
700 701
	innobase_release_stat_resources(prebuilt->trx);

702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727
        /* If the transaction is not started yet, start it */

        trx_start_if_not_started_noninline(prebuilt->trx);

        /* Assign a read view if the transaction does not have it yet */

        trx_assign_read_view(prebuilt->trx);

        /* We did the necessary inits in this function, no need to repeat them
        in row_search_for_mysql */

        prebuilt->sql_stat_start = FALSE;

        /* We let HANDLER always to do the reads as consistent reads, even
        if the trx isolation level would have been specified as SERIALIZABLE */

        prebuilt->select_lock_type = LOCK_NONE;

        /* Always fetch all columns in the index record */

        prebuilt->hint_no_need_to_fetch_extra_cols = FALSE;

        /* We want always to fetch all columns in the whole row? Or do
	we???? */

        prebuilt->read_just_key = FALSE;
728 729

	prebuilt->used_in_HANDLER = TRUE;
730 731
}

732
/*************************************************************************
733
Opens an InnoDB database. */
734

735
bool
736 737
innobase_init(void)
/*===============*/
738
			/* out: TRUE if error */
739
{
unknown's avatar
unknown committed
740
	static char current_dir[3];		// Set if using current lib
741 742
	int		err;
	bool		ret;
743
	char 	        *default_path;
unknown's avatar
merge  
unknown committed
744

745 746
  	DBUG_ENTER("innobase_init");

unknown's avatar
unknown committed
747
  	os_innodb_umask = (ulint)my_umask;
unknown's avatar
unknown committed
748

unknown's avatar
unknown committed
749 750 751 752 753 754 755 756
	/* First calculate the default path for innodb_data_home_dir etc.,
	in case the user has not given any value.

	Note that when using the embedded server, the datadirectory is not
	necessarily the current directory of this program. */

	if (mysql_embedded) {
		default_path = mysql_real_data_home;
757
		fil_path_to_mysql_datadir = mysql_real_data_home;
unknown's avatar
unknown committed
758 759 760 761 762 763
	} else {
	  	/* It's better to use current lib, to keep paths short */
	  	current_dir[0] = FN_CURLIB;
	  	current_dir[1] = FN_LIBCHAR;
	  	current_dir[2] = 0;
	  	default_path = current_dir;
unknown's avatar
unknown committed
764 765
	}

unknown's avatar
unknown committed
766 767
	ut_a(default_path);

unknown's avatar
unknown committed
768 769 770 771 772 773
	if (specialflag & SPECIAL_NO_PRIOR) {
	        srv_set_thread_priorities = FALSE;
	} else {
	        srv_set_thread_priorities = TRUE;
	        srv_query_thread_priority = QUERY_PRIOR;
	}
unknown's avatar
unknown committed
774 775 776
	
	/* Set InnoDB initialization parameters according to the values
	read from MySQL .cnf file */
unknown's avatar
unknown committed
777

unknown's avatar
unknown committed
778 779 780
	/*--------------- Data files -------------------------*/

	/* The default dir for data files is the datadir of MySQL */
unknown's avatar
unknown committed
781 782

	srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
unknown's avatar
unknown committed
783
			 default_path);
unknown's avatar
unknown committed
784

unknown's avatar
unknown committed
785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800
	/* Set default InnoDB data file size to 10 MB and let it be
  	auto-extending. Thus users can use InnoDB in >= 4.0 without having
	to specify any startup options. */

	if (!innobase_data_file_path) {
  		innobase_data_file_path = (char*) "ibdata1:10M:autoextend";
	}

	/* Since InnoDB edits the argument in the next call, we make another
	copy of it: */

	internal_innobase_data_file_path = my_strdup(innobase_data_file_path,
						   MYF(MY_WME));

	ret = (bool) srv_parse_data_file_paths_and_sizes(
				internal_innobase_data_file_path,
unknown's avatar
unknown committed
801 802 803 804 805 806
				&srv_data_file_names,
				&srv_data_file_sizes,
				&srv_data_file_is_raw_partition,
				&srv_n_data_files,
				&srv_auto_extend_last_data_file,
				&srv_last_file_size_max);
807
	if (ret == FALSE) {
unknown's avatar
unknown committed
808 809 810
	  	sql_print_error(
			"InnoDB: syntax error in innodb_data_file_path");
	  	DBUG_RETURN(TRUE);
unknown's avatar
unknown committed
811
	}
812

unknown's avatar
unknown committed
813 814 815 816 817 818 819 820 821 822 823 824 825 826 827
	/* -------------- Log files ---------------------------*/

	/* The default dir for log files is the datadir of MySQL */
	
	if (!innobase_log_group_home_dir) {
	  	innobase_log_group_home_dir = default_path;
	}
	  	
	/* Since innodb_log_arch_dir has no relevance under MySQL,
	starting from 4.0.6 we always set it the same as
	innodb_log_group_home_dir: */

	innobase_log_arch_dir = innobase_log_group_home_dir;

	srv_arch_dir = innobase_log_arch_dir;
unknown's avatar
unknown committed
828

unknown's avatar
unknown committed
829 830 831
	ret = (bool)
		srv_parse_log_group_home_dirs(innobase_log_group_home_dir,
						&srv_log_group_home_dirs);
unknown's avatar
unknown committed
832

unknown's avatar
unknown committed
833 834 835 836
	if (ret == FALSE || innobase_mirrored_log_groups != 1) {
		fprintf(stderr,
		"InnoDB: syntax error in innodb_log_group_home_dir\n"
		"InnoDB: or a wrong number of mirrored log groups\n");
unknown's avatar
unknown committed
837

unknown's avatar
unknown committed
838
		DBUG_RETURN(TRUE);
unknown's avatar
unknown committed
839
	}
unknown's avatar
unknown committed
840

unknown's avatar
unknown committed
841 842 843
	/* --------------------------------------------------*/

	srv_file_flush_method_str = innobase_unix_file_flush_method;
844

unknown's avatar
unknown committed
845
	srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
846
	srv_n_log_files = (ulint) innobase_log_files_in_group;
unknown's avatar
unknown committed
847 848 849 850
	srv_log_file_size = (ulint) innobase_log_file_size;

	srv_log_archive_on = (ulint) innobase_log_archive;
	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
unknown's avatar
unknown committed
851
	srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit;
852

unknown's avatar
unknown committed
853
	srv_pool_size = (ulint) innobase_buffer_pool_size;
unknown's avatar
unknown committed
854

unknown's avatar
unknown committed
855 856 857
	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;

	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
858

859
	srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
unknown's avatar
Merge  
unknown committed
860 861 862 863
	srv_thread_concurrency = (ulint) innobase_thread_concurrency;
	srv_force_recovery = (ulint) innobase_force_recovery;

	srv_fast_shutdown = (ibool) innobase_fast_shutdown;
864

865
	srv_print_verbose_log = mysql_embedded ? 0 : 1;
unknown's avatar
unknown committed
866

867
	if (strcmp(default_charset_info->name, "latin1") == 0) {
unknown's avatar
unknown committed
868

869 870 871 872
		/* Store the character ordering table to InnoDB.
		For non-latin1 charsets we use the MySQL comparison
		functions, and consequently we do not need to know
		the ordering internally in InnoDB. */
unknown's avatar
unknown committed
873

874 875 876
		memcpy(srv_latin1_ordering,
				default_charset_info->sort_order, 256);
	}
877

878 879 880 881 882 883 884 885
	/* Since we in this module access directly the fields of a trx
        struct, and due to different headers and flags it might happen that
	mutex_t has a different size in this module and in InnoDB
	modules, we check at run time that the size is the same in
	these compilation modules. */

	srv_sizeof_trx_t_in_ha_innodb_cc = sizeof(trx_t);

unknown's avatar
unknown committed
886
	err = innobase_start_or_create_for_mysql();
887 888 889

	if (err != DB_SUCCESS) {

unknown's avatar
unknown committed
890
		DBUG_RETURN(1);
891
	}
unknown's avatar
unknown committed
892

893
	(void) hash_init(&innobase_open_tables,32,0,0,
unknown's avatar
unknown committed
894
			 		(hash_get_key) innobase_get_key,0,0);
895
	pthread_mutex_init(&innobase_mutex,MY_MUTEX_INIT_FAST);
unknown's avatar
unknown committed
896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911

	/* If this is a replication slave and we needed to do a crash recovery,
	set the master binlog position to what InnoDB internally knew about
	how far we got transactions durable inside InnoDB. There is a
	problem here: if the user used also MyISAM tables, InnoDB might not
	know the right position for them.

	THIS DOES NOT WORK CURRENTLY because replication seems to initialize
	glob_mi also after innobase_init. */
	
/*	if (trx_sys_mysql_master_log_pos != -1) {
		ut_memcpy(glob_mi.log_file_name, trx_sys_mysql_master_log_name,
				1 + ut_strlen(trx_sys_mysql_master_log_name));
		glob_mi.pos = trx_sys_mysql_master_log_pos;
	}
*/
unknown's avatar
unknown committed
912
  	DBUG_RETURN(0);
913 914 915
}

/***********************************************************************
916
Closes an InnoDB database. */
917

918
bool
919 920
innobase_end(void)
/*==============*/
921
				/* out: TRUE if error */
922 923 924 925 926 927
{
	int	err;

	DBUG_ENTER("innobase_end");

	err = innobase_shutdown_for_mysql();
928
	hash_free(&innobase_open_tables);
unknown's avatar
unknown committed
929
	my_free(internal_innobase_data_file_path,MYF(MY_ALLOW_ZERO_PTR));
unknown's avatar
unknown committed
930
	pthread_mutex_destroy(&innobase_mutex);
931 932 933

	if (err != DB_SUCCESS) {

unknown's avatar
unknown committed
934
	  DBUG_RETURN(1);
935
	}
936

unknown's avatar
unknown committed
937
  	DBUG_RETURN(0);
938 939 940
}

/********************************************************************
941
Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit
942 943
flushes logs, and the name of this function should be innobase_checkpoint. */

944
bool
945 946
innobase_flush_logs(void)
/*=====================*/
947
				/* out: TRUE if error */
948
{
949
  	bool 	result = 0;
950 951 952

  	DBUG_ENTER("innobase_flush_logs");

unknown's avatar
unknown committed
953
	log_buffer_flush_to_disk();
954

955 956 957
  	DBUG_RETURN(result);
}

958
/*************************************************************************
959
Gets the free space in an InnoDB database: returned in units of kB. */
960 961 962 963 964 965 966 967 968

uint
innobase_get_free_space(void)
/*=========================*/
			/* out: free space in kB */
{
	return((uint) fsp_get_available_space_in_free_extents(0));
}

969
/*********************************************************************
970
Commits a transaction in an InnoDB database. */
971

unknown's avatar
unknown committed
972 973 974 975 976
void
innobase_commit_low(
/*================*/
	trx_t*	trx)	/* in: transaction handle */
{
unknown's avatar
unknown committed
977 978 979 980 981
	if (trx->conc_state == TRX_NOT_STARTED) {

	        return;
	}

982 983
        if (current_thd->slave_thread) {
                /* Update the replication position info inside InnoDB */
unknown's avatar
unknown committed
984

985 986
                trx->mysql_master_log_file_name
                                        = active_mi->rli.master_log_name;
unknown's avatar
unknown committed
987 988 989 990 991 992
                trx->mysql_master_log_pos = (ib_longlong)
#if MYSQL_VERSION_ID < 40100
                  (active_mi->rli.future_master_log_pos);
#else
                  (active_mi->rli.future_group_master_log_pos);
#endif
993
        }
unknown's avatar
unknown committed
994 995

	trx_commit_for_mysql(trx);
unknown's avatar
unknown committed
996 997 998
}

/*********************************************************************
unknown's avatar
unknown committed
999 1000
Commits a transaction in an InnoDB database or marks an SQL statement
ended. */
unknown's avatar
unknown committed
1001

1002 1003 1004
int
innobase_commit(
/*============*/
unknown's avatar
unknown committed
1005
			/* out: 0 */
unknown's avatar
unknown committed
1006
	THD*	thd,	/* in: MySQL thread handle of the user for whom
1007
			the transaction should be committed */
1008 1009
	void*	trx_handle)/* in: InnoDB trx handle or
			&innodb_dummy_stmt_trx_handle: the latter means
unknown's avatar
unknown committed
1010 1011
			that the current SQL statement ended, and we should
			mark the start of a new statement with a savepoint */
1012
{
1013
	trx_t*	trx;
1014 1015 1016 1017

  	DBUG_ENTER("innobase_commit");
  	DBUG_PRINT("trans", ("ending transaction"));

unknown's avatar
unknown committed
1018
	trx = check_trx_exists(thd);
1019

unknown's avatar
unknown committed
1020 1021 1022 1023 1024 1025
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

unknown's avatar
unknown committed
1026 1027 1028 1029 1030 1031
	/* The flag thd->transaction.all.innodb_active_trans is set to 1 in
	::external_lock, ::start_stmt, and innobase_savepoint, and it is only
	set to 0 in a commit or a rollback. If it is 0 we know there cannot be
	resources to be freed and we could return immediately. For the time
	being we play safe and do the cleanup though there should be nothing
	to clean up. */
unknown's avatar
unknown committed
1032

unknown's avatar
unknown committed
1033 1034 1035 1036 1037 1038
	if (thd->transaction.all.innodb_active_trans == 0
	    && trx->conc_state != TRX_NOT_STARTED) {
	    
	        fprintf(stderr,
"InnoDB: Error: thd->transaction.all.innodb_active_trans == 0\n"
"InnoDB: but trx->conc_state != TRX_NOT_STARTED\n");
unknown's avatar
unknown committed
1039 1040
	}

unknown's avatar
unknown committed
1041 1042 1043
	if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
	        
unknown's avatar
unknown committed
1044
		innobase_commit_low(trx);
unknown's avatar
unknown committed
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058

		thd->transaction.all.innodb_active_trans = 0;
	} else {
		if (trx->auto_inc_lock) {
			/* If we had reserved the auto-inc lock for some
			table in this SQL statement we release it now */
		  	
			row_unlock_table_autoinc_for_mysql(trx);
		}
		/* Store the current undo_no of the transaction so that we
		know where to roll back if we have to roll back the next
		SQL statement */

		trx_mark_sql_stat_end(trx);
unknown's avatar
unknown committed
1059
	}
1060

unknown's avatar
unknown committed
1061 1062
	/* Tell the InnoDB server that there might be work for utility
	threads: */
1063 1064 1065

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
1066
	DBUG_RETURN(0);
1067 1068
}

1069 1070 1071 1072
/*********************************************************************
This is called when MySQL writes the binlog entry for the current
transaction. Writes to the InnoDB tablespace info which tells where the
MySQL binlog entry for the current transaction ended. Also commits the
unknown's avatar
unknown committed
1073 1074 1075 1076
transaction inside InnoDB but does NOT flush InnoDB log files to disk.
To flush you have to call innobase_flush_log_to_disk. We have separated
flushing to eliminate the bottleneck of LOCK_log in log.cc which disabled
InnoDB's group commit capability. */
1077 1078 1079 1080

int
innobase_report_binlog_offset_and_commit(
/*=====================================*/
unknown's avatar
unknown committed
1081
                                /* out: 0 */
1082
        THD*    thd,            /* in: user thread */
unknown's avatar
unknown committed
1083
        void*   trx_handle,     /* in: InnoDB trx handle */
1084 1085
        char*   log_file_name,  /* in: latest binlog file name */
        my_off_t end_offset)    /* in: the offset in the binlog file
unknown's avatar
unknown committed
1086
                                   up to which we wrote */
1087
{
unknown's avatar
unknown committed
1088 1089 1090
	trx_t*	trx;

	trx = (trx_t*)trx_handle;
1091

unknown's avatar
unknown committed
1092 1093
	ut_a(trx != NULL);

unknown's avatar
unknown committed
1094 1095 1096
	trx->mysql_log_file_name = log_file_name;  	
	trx->mysql_log_offset = (ib_longlong)end_offset;
	
unknown's avatar
unknown committed
1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129
	trx->flush_log_later = TRUE;

  	innobase_commit(thd, trx_handle);

	trx->flush_log_later = FALSE;

	return(0);
}

/*********************************************************************
This is called after MySQL has written the binlog entry for the current
transaction. Flushes the InnoDB log files to disk if required. */

int
innobase_commit_complete(
/*=====================*/
                                /* out: 0 */
        void*   trx_handle)     /* in: InnoDB trx handle */
{
	trx_t*	trx;

	if (srv_flush_log_at_trx_commit == 0) {

	        return(0);
	}

	trx = (trx_t*)trx_handle;

	ut_a(trx != NULL);

  	trx_commit_complete_for_mysql(trx);

	return(0);
1130 1131
}

1132
/*********************************************************************
unknown's avatar
unknown committed
1133
Rolls back a transaction or the latest SQL statement. */
1134 1135 1136 1137 1138

int
innobase_rollback(
/*==============*/
			/* out: 0 or error number */
unknown's avatar
unknown committed
1139
	THD*	thd,	/* in: handle to the MySQL thread of the user
1140
			whose transaction should be rolled back */
1141 1142 1143
	void*	trx_handle)/* in: InnoDB trx handle or a dummy stmt handle;
			the latter means we roll back the latest SQL
			statement */
1144 1145
{
	int	error = 0;
1146
	trx_t*	trx;
1147

1148 1149 1150
	DBUG_ENTER("innobase_rollback");
	DBUG_PRINT("trans", ("aborting transaction"));

1151
	trx = check_trx_exists(thd);
1152

unknown's avatar
unknown committed
1153 1154 1155 1156 1157 1158
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

unknown's avatar
unknown committed
1159
        if (trx->auto_inc_lock) {
unknown's avatar
unknown committed
1160 1161 1162 1163
		/* If we had reserved the auto-inc lock for some table (if
		we come here to roll back the latest SQL statement) we
		release it now before a possibly lengthy rollback */
		
unknown's avatar
unknown committed
1164 1165 1166
		row_unlock_table_autoinc_for_mysql(trx);
	}

unknown's avatar
unknown committed
1167 1168
	if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
unknown's avatar
Merge  
unknown committed
1169

1170
		error = trx_rollback_for_mysql(trx);
unknown's avatar
unknown committed
1171
		thd->transaction.all.innodb_active_trans = 0;
unknown's avatar
unknown committed
1172
	} else {
1173
		error = trx_rollback_last_sql_stat_for_mysql(trx);
unknown's avatar
unknown committed
1174
	}
1175

unknown's avatar
unknown committed
1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

/*********************************************************************
Rolls back a transaction to a savepoint. */

int
innobase_rollback_to_savepoint(
/*===========================*/
				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
				no savepoint with the given name */
	THD*	thd,		/* in: handle to the MySQL thread of the user
				whose transaction should be rolled back */
	char*	savepoint_name,	/* in: savepoint name */
	my_off_t* binlog_cache_pos)/* out: position which corresponds to the
				savepoint in the binlog cache of this
				transaction, not defined if error */
{
	ib_longlong mysql_binlog_cache_pos;
	int	    error = 0;
	trx_t*	    trx;

	DBUG_ENTER("innobase_rollback_to_savepoint");

	trx = check_trx_exists(thd);

unknown's avatar
unknown committed
1202 1203 1204 1205 1206
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);
unknown's avatar
unknown committed
1207 1208 1209 1210 1211

	error = trx_rollback_to_savepoint_for_mysql(trx, savepoint_name,
						&mysql_binlog_cache_pos);

	*binlog_cache_pos = (my_off_t)mysql_binlog_cache_pos;
unknown's avatar
unknown committed
1212

unknown's avatar
unknown committed
1213
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
1214 1215
}

unknown's avatar
unknown committed
1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243
/*********************************************************************
Sets a transaction savepoint. */

int
innobase_savepoint(
/*===============*/
				/* out: always 0, that is, always succeeds */
	THD*	thd,		/* in: handle to the MySQL thread */
	char*	savepoint_name,	/* in: savepoint name */
	my_off_t binlog_cache_pos)/* in: offset up to which the current
				transaction has cached log entries to its
				binlog cache, not defined if no transaction
				active, or we are in the autocommit state, or
				binlogging is not switched on */
{
	int	error = 0;
	trx_t*	trx;

	DBUG_ENTER("innobase_savepoint");

	if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
		/* In the autocommit state there is no sense to set a
		savepoint: we return immediate success */
	        DBUG_RETURN(0);
	}

	trx = check_trx_exists(thd);

unknown's avatar
unknown committed
1244 1245 1246 1247 1248 1249
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

unknown's avatar
unknown committed
1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261
	/* Setting a savepoint starts a transaction inside InnoDB since
	it allocates resources for it (memory to store the savepoint name,
	for example) */

	thd->transaction.all.innodb_active_trans = 1;

	error = trx_savepoint_for_mysql(trx, savepoint_name,
					     (ib_longlong)binlog_cache_pos);

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

1262
/*********************************************************************
unknown's avatar
unknown committed
1263
Frees a possible InnoDB trx object associated with the current THD. */
1264 1265 1266 1267

int
innobase_close_connection(
/*======================*/
unknown's avatar
unknown committed
1268 1269 1270
                        /* out: 0 or error number */
        THD*    thd)    /* in: handle to the MySQL thread of the user
                        whose transaction should be rolled back */
1271
{
unknown's avatar
unknown committed
1272
        trx_t*  trx;
unknown's avatar
unknown committed
1273

unknown's avatar
unknown committed
1274
        trx = (trx_t*)thd->transaction.all.innobase_tid;
1275

unknown's avatar
unknown committed
1276 1277 1278 1279 1280 1281 1282 1283 1284
        if (NULL != trx) {
                innobase_rollback(thd, (void*)trx);

                trx_free_for_mysql(trx);

                thd->transaction.all.innobase_tid = NULL;
        }

        return(0);
1285
}
1286 1287

/*****************************************************************************
1288
** InnoDB database tables
1289 1290 1291
*****************************************************************************/

/********************************************************************
1292 1293 1294
This function is not relevant since we store the tables and indexes
into our own tablespace, not as files, whose extension this function would
give. */
1295 1296 1297 1298

const char**
ha_innobase::bas_ext() const
/*========================*/
1299 1300
				/* out: file extension strings, currently not
				used */
1301
{
1302
	static const char* ext[] = {".InnoDB", NullS};
1303

1304 1305 1306
	return(ext);
}

1307 1308 1309
/*********************************************************************
Normalizes a table name string. A normalized name consists of the
database name catenated to '/' and table name. An example:
unknown's avatar
unknown committed
1310 1311
test/mytable. On Windows normalization puts both the database name and the
table name always to lower case. */
1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325
static
void
normalize_table_name(
/*=================*/
	char*		norm_name,	/* out: normalized name as a
					null-terminated string */
	const char*	name)		/* in: table name string */
{
	char*	name_ptr;
	char*	db_ptr;
	char*	ptr;

	/* Scan name from the end */

unknown's avatar
unknown committed
1326
	ptr = strend(name)-1;
1327 1328 1329 1330 1331 1332 1333

	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	name_ptr = ptr + 1;

unknown's avatar
unknown committed
1334
	DBUG_ASSERT(ptr > name);
1335 1336

	ptr--;
1337

1338 1339 1340 1341 1342 1343 1344 1345 1346
	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	db_ptr = ptr + 1;

	memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name));

	norm_name[name_ptr - db_ptr - 1] = '/';
unknown's avatar
unknown committed
1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357

#ifdef __WIN__
	/* Put to lower case */

	ptr = norm_name;

	while (*ptr != '\0') {
	        *ptr = tolower(*ptr);
	        ptr++;
	}
#endif
1358
}
1359

1360
/*********************************************************************
unknown's avatar
unknown committed
1361
Creates and opens a handle to a table which already exists in an InnoDB
1362 1363 1364 1365 1366 1367 1368 1369
database. */

int
ha_innobase::open(
/*==============*/
					/* out: 1 if error, 0 if success */
	const char*	name,		/* in: table name */
	int 		mode,		/* in: not used */
unknown's avatar
unknown committed
1370
	uint 		test_if_locked)	/* in: not used */
1371
{
1372 1373 1374
	dict_table_t*	ib_table;
  	int 		error	= 0;
  	char		norm_name[1000];
1375 1376 1377 1378 1379 1380

	DBUG_ENTER("ha_innobase::open");

	UT_NOT_USED(mode);
	UT_NOT_USED(test_if_locked);

1381 1382
	normalize_table_name(norm_name, name);

1383 1384
	user_thd = NULL;

unknown's avatar
unknown committed
1385 1386
	last_query_id = (ulong)-1;

unknown's avatar
unknown committed
1387 1388 1389
	active_index = 0;
	active_index_before_scan = (uint)-1; /* undefined value */

1390 1391 1392
	if (!(share=get_share(name)))
	  DBUG_RETURN(1);

1393 1394 1395 1396
	/* Create buffers for packing the fields of a record. Why
	table->reclength did not work here? Obviously, because char
	fields when packed actually became 1 byte longer, when we also
	stored the string length as the first byte. */
1397

unknown's avatar
unknown committed
1398
	upd_and_key_val_buff_len = table->reclength + table->max_key_length
1399
							+ MAX_REF_PARTS * 3;
1400
	if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME),
unknown's avatar
unknown committed
1401 1402
				     &upd_buff, upd_and_key_val_buff_len,
				     &key_val_buff, upd_and_key_val_buff_len,
1403
				     NullS)) {
1404
	  	free_share(share);
1405
	  	DBUG_RETURN(1);
1406 1407
  	}

1408
	/* Get pointer to a table object in InnoDB dictionary cache */
1409

1410 1411 1412
	ib_table = dict_table_get_and_increment_handle_count(
				      		     norm_name, NULL);
 	if (NULL == ib_table) {
unknown's avatar
unknown committed
1413 1414 1415 1416 1417 1418 1419 1420 1421
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"Cannot find table %s from the internal data dictionary\n"
"of InnoDB though the .frm file for the table exists. Maybe you\n"
"have deleted and recreated InnoDB data files but have forgotten\n"
"to delete the corresponding .frm files of InnoDB tables, or you\n"
"have moved .frm files to another database?\n"
"Look from section 15.1 of http://www.innodb.com/ibman.html\n"
"how you can resolve the problem.\n",
1422
			  norm_name);
1423

1424
	        free_share(share);
1425
    		my_free((char*) upd_buff, MYF(0));
1426 1427 1428 1429
    		my_errno = ENOENT;
    		DBUG_RETURN(1);
  	}

1430
	innobase_prebuilt = row_create_prebuilt(ib_table);
1431

1432
	((row_prebuilt_t*)innobase_prebuilt)->mysql_row_len = table->reclength;
1433

unknown's avatar
unknown committed
1434 1435 1436 1437
	/* Looks like MySQL-3.23 sometimes has primary key number != 0 */

 	primary_key = table->primary_key;
	key_used_on_scan = primary_key;
1438

unknown's avatar
unknown committed
1439 1440 1441 1442 1443
	/* Allocate a buffer for a 'row reference'. A row reference is
	a string of bytes of length ref_length which uniquely specifies
        a row in our table. Note that MySQL may also compare two row
        references for equality by doing a simple memcmp on the strings
        of length ref_length! */
1444

unknown's avatar
unknown committed
1445
  	if (!row_table_got_default_clust_index(ib_table)) {
unknown's avatar
unknown committed
1446 1447 1448 1449 1450
	        if (primary_key >= MAX_KEY) {
	                fprintf(stderr,
		    "InnoDB: Error: table %s has a primary key in InnoDB\n"
		    "InnoDB: data dictionary, but not in MySQL!\n", name);
		}
1451 1452 1453

		((row_prebuilt_t*)innobase_prebuilt)
				->clust_index_was_generated = FALSE;
unknown's avatar
unknown committed
1454
 		/*
unknown's avatar
unknown committed
1455 1456 1457 1458 1459
		  MySQL allocates the buffer for ref. key_info->key_length
		  includes space for all key columns + one byte for each column
		  that may be NULL. ref_length must be as exact as possible to
		  save space, because all row reference buffers are allocated
		  based on ref_length.
unknown's avatar
unknown committed
1460
		*/
unknown's avatar
unknown committed
1461
 
unknown's avatar
unknown committed
1462
  		ref_length = table->key_info[primary_key].key_length;
1463
	} else {
unknown's avatar
unknown committed
1464 1465 1466
	        if (primary_key != MAX_KEY) {
	                fprintf(stderr,
		    "InnoDB: Error: table %s has no primary key in InnoDB\n"
unknown's avatar
unknown committed
1467 1468 1469 1470 1471 1472 1473 1474
		    "InnoDB: data dictionary, but has one in MySQL!\n"
		    "InnoDB: If you created the table with a MySQL\n"
                    "InnoDB: version < 3.23.54 and did not define a primary\n"
                    "InnoDB: key, but defined a unique key with all non-NULL\n"
                    "InnoDB: columns, then MySQL internally treats that key\n"
                    "InnoDB: as the primary key. You can fix this error by\n"
		    "InnoDB: dump + DROP + CREATE + reimport of the table.\n",
				name);
unknown's avatar
unknown committed
1475 1476
		}

1477 1478 1479
		((row_prebuilt_t*)innobase_prebuilt)
				->clust_index_was_generated = TRUE;

unknown's avatar
unknown committed
1480
  		ref_length = DATA_ROW_ID_LEN;
unknown's avatar
unknown committed
1481

unknown's avatar
unknown committed
1482 1483 1484 1485 1486 1487 1488 1489 1490
		/*
		  If we automatically created the clustered index, then
		  MySQL does not know about it, and MySQL must NOT be aware
		  of the index used on scan, to make it avoid checking if we
		  update the column of the index. That is why we assert below
		  that key_used_on_scan is the undefined value MAX_KEY.
		  The column is the row id in the automatical generation case,
		  and it will never be updated anyway.
		*/
unknown's avatar
unknown committed
1491 1492 1493 1494 1495 1496 1497
	       
		if (key_used_on_scan != MAX_KEY) {
	                fprintf(stderr,
"InnoDB: Warning: table %s key_used_on_scan is %lu even though there is no\n"
"InnoDB: primary key inside InnoDB.\n",
				name, (ulint)key_used_on_scan);
		}
1498
	}
1499

unknown's avatar
Merge  
unknown committed
1500 1501
	auto_inc_counter_for_this_stat = 0;

unknown's avatar
unknown committed
1502 1503 1504
	block_size = 16 * 1024;	/* Index block size in InnoDB: used by MySQL
				in query optimization */

unknown's avatar
Merge  
unknown committed
1505
	/* Init table lock structure */
1506
	thr_lock_data_init(&share->lock,&lock,(void*) 0);
1507 1508

  	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
1509

1510 1511 1512 1513 1514 1515 1516 1517 1518 1519
  	DBUG_RETURN(0);
}

/*********************************************************************
Does nothing. */

void
ha_innobase::initialize(void)
/*=========================*/
{
1520
}
1521 1522

/**********************************************************************
1523
Closes a handle to an InnoDB table. */
1524 1525 1526 1527 1528 1529 1530 1531 1532 1533

int
ha_innobase::close(void)
/*====================*/
				/* out: error number */
{
  	DBUG_ENTER("ha_innobase::close");

	row_prebuilt_free((row_prebuilt_t*) innobase_prebuilt);

1534
    	my_free((char*) upd_buff, MYF(0));
1535 1536
        free_share(share);

1537
	/* Tell InnoDB server that there might be work for
1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553
	utility threads: */

	srv_active_wake_master_thread();

  	DBUG_RETURN(0);
}

/* The following accessor functions should really be inside MySQL code! */

/******************************************************************
Gets field offset for a field in a table. */
inline
uint
get_field_offset(
/*=============*/
			/* out: offset */
1554
	TABLE*	table,	/* in: MySQL table object */
1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567
	Field*	field)	/* in: MySQL field object */
{
	return((uint) (field->ptr - (char*) table->record[0]));
}

/******************************************************************
Checks if a field in a record is SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
uint
field_in_record_is_null(
/*====================*/
			/* out: 1 if NULL, 0 otherwise */
1568
	TABLE*	table,	/* in: MySQL table object */
1569 1570 1571 1572 1573 1574 1575 1576 1577
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	if (!field->null_ptr) {

		return(0);
	}
1578

1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596
	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	if (record[null_offset] & field->null_bit) {

		return(1);
	}

	return(0);
}

/******************************************************************
Sets a field in a record to SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
void
set_field_in_record_to_null(
/*========================*/
1597
	TABLE*	table,	/* in: MySQL table object */
1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	record[null_offset] = record[null_offset] | field->null_bit;
}

/******************************************************************
Resets SQL NULL bits in a record to zero. */
inline
void
reset_null_bits(
/*============*/
1615
	TABLE*	table,	/* in: MySQL table object */
1616 1617 1618 1619 1620 1621 1622
	char*	record)	/* in: a row in MySQL format */
{
	bzero(record, table->null_bytes);
}

extern "C" {
/*****************************************************************
1623
InnoDB uses this function is to compare two data fields for which the
1624
data type is such that we must use MySQL code to compare them. NOTE that the
1625
prototype of this function is in rem0cmp.c in InnoDB source code!
1626
If you change this function, remember to update the prototype there! */
1627 1628 1629

int
innobase_mysql_cmp(
1630
/*===============*/
1631 1632
					/* out: 1, 0, -1, if a is greater,
					equal, less than b, respectively */
1633
	int		mysql_type,	/* in: MySQL type */
1634 1635 1636 1637 1638 1639 1640 1641
	unsigned char*	a,		/* in: data field */
	unsigned int	a_length,	/* in: data field length,
					not UNIV_SQL_NULL */
	unsigned char*	b,		/* in: data field */
	unsigned int	b_length)	/* in: data field length,
					not UNIV_SQL_NULL */
{
	enum_field_types	mysql_tp;
1642
	int                     ret;
1643

unknown's avatar
unknown committed
1644 1645
	DBUG_ASSERT(a_length != UNIV_SQL_NULL);
	DBUG_ASSERT(b_length != UNIV_SQL_NULL);
1646 1647 1648 1649 1650 1651 1652

	mysql_tp = (enum_field_types) mysql_type;

	switch (mysql_tp) {

	case FIELD_TYPE_STRING:
	case FIELD_TYPE_VAR_STRING:
unknown's avatar
unknown committed
1653 1654 1655 1656
	case FIELD_TYPE_TINY_BLOB:
	case FIELD_TYPE_MEDIUM_BLOB:
	case FIELD_TYPE_BLOB:
	case FIELD_TYPE_LONG_BLOB:
1657 1658 1659
  		ret = my_sortncmp((const char*) a, a_length,
				  (const char*) b, b_length);
		if (ret < 0) {
1660
		        return(-1);
1661
		} else if (ret > 0) {
1662
		        return(1);
1663
		} else {
1664
		        return(0);
1665
	        }
1666 1667 1668 1669 1670 1671 1672 1673 1674
	default:
		assert(0);
	}

	return(0);
}
}

/******************************************************************
1675
Converts a MySQL type to an InnoDB type. */
1676 1677
inline
ulint
1678 1679 1680
get_innobase_type_from_mysql_type(
/*==============================*/
			/* out: DATA_BINARY, DATA_VARCHAR, ... */
1681 1682
	Field*	field)	/* in: MySQL field */
{
unknown's avatar
unknown committed
1683
	/* The following asserts check that the MySQL type code fits in
1684 1685
	8 bits: this is used in ibuf and also when DATA_NOT_NULL is
	ORed to the type */
1686

unknown's avatar
unknown committed
1687 1688 1689 1690 1691
	DBUG_ASSERT((ulint)FIELD_TYPE_STRING < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_VAR_STRING < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_DOUBLE < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_FLOAT < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_DECIMAL < 256);
1692 1693

	switch (field->type()) {
unknown's avatar
unknown committed
1694 1695
	        /* NOTE that we only allow string types in DATA_MYSQL
		and DATA_VARMYSQL */
1696
		case FIELD_TYPE_VAR_STRING: if (field->flags & BINARY_FLAG) {
1697 1698 1699 1700 1701 1702

						return(DATA_BINARY);
					} else if (strcmp(
						   default_charset_info->name,
							"latin1") == 0) {
						return(DATA_VARCHAR);
1703 1704
					} else {
						return(DATA_VARMYSQL);
1705
					}
1706 1707 1708 1709 1710 1711 1712
		case FIELD_TYPE_STRING: if (field->flags & BINARY_FLAG) {

						return(DATA_FIXBINARY);
					} else if (strcmp(
						   default_charset_info->name,
							"latin1") == 0) {
						return(DATA_CHAR);
1713 1714
					} else {
						return(DATA_MYSQL);
1715
					}
1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726
		case FIELD_TYPE_LONG:
		case FIELD_TYPE_LONGLONG:
		case FIELD_TYPE_TINY:
		case FIELD_TYPE_SHORT:
		case FIELD_TYPE_INT24:
		case FIELD_TYPE_DATE:
		case FIELD_TYPE_DATETIME:
		case FIELD_TYPE_YEAR:
		case FIELD_TYPE_NEWDATE:
		case FIELD_TYPE_ENUM:
		case FIELD_TYPE_SET:
1727 1728 1729
		case FIELD_TYPE_TIME:
		case FIELD_TYPE_TIMESTAMP:
					return(DATA_INT);
1730
		case FIELD_TYPE_FLOAT:
1731
					return(DATA_FLOAT);
1732
		case FIELD_TYPE_DOUBLE:
1733
					return(DATA_DOUBLE);
1734
		case FIELD_TYPE_DECIMAL:
1735 1736 1737 1738 1739 1740
					return(DATA_DECIMAL);
		case FIELD_TYPE_TINY_BLOB:
		case FIELD_TYPE_MEDIUM_BLOB:
		case FIELD_TYPE_BLOB:
		case FIELD_TYPE_LONG_BLOB:
					return(DATA_BLOB);
1741 1742 1743 1744 1745 1746
		default:
					assert(0);
	}

	return(0);
}
1747

1748
/***********************************************************************
1749
Stores a key value for a row to a buffer. */
1750 1751 1752 1753 1754 1755 1756

uint
ha_innobase::store_key_val_for_row(
/*===============================*/
				/* out: key value length as stored in buff */
	uint 		keynr,	/* in: key number */
	char*		buff,	/* in/out: buffer for the key value (in MySQL
1757 1758
				format) */
	uint		buff_len,/* in: buffer length */
1759
	const mysql_byte* record)/* in: row in MySQL format */
1760 1761 1762 1763 1764
{
	KEY*		key_info 	= table->key_info + keynr;
  	KEY_PART_INFO*	key_part	= key_info->key_part;
  	KEY_PART_INFO*	end		= key_part + key_info->key_parts;
	char*		buff_start	= buff;
unknown's avatar
unknown committed
1765 1766 1767 1768 1769
	enum_field_types mysql_type;
	Field*		field;
	ulint		blob_len;
	byte*		blob_data;
	ibool		is_null;
1770

1771 1772
  	DBUG_ENTER("store_key_val_for_row");

unknown's avatar
unknown committed
1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788
	/* The format for storing a key field in MySQL is the following:

	1. If the column can be NULL, then in the first byte we put 1 if the
	field value is NULL, 0 otherwise.

	2. If the column is of a BLOB type (it must be a column prefix field
	in this case), then we put the length of the data in the field to the
	next 2 bytes, in the little-endian format. If the field is SQL NULL,
	then these 2 bytes are set to 0. Note that the length of data in the
	field is <= column prefix length.

	3. In a column prefix field, prefix_len next bytes are reserved for
	data. In a normal field the max field length next bytes are reserved
	for data. For a VARCHAR(n) the max field length is n. If the stored
	value is the SQL NULL then these data bytes are set to 0. */	

1789 1790 1791
	/* We have to zero-fill the buffer so that MySQL is able to use a
	simple memcmp to compare two key values to determine if they are
	equal. MySQL does this to compare contents of two 'ref' values. */
unknown's avatar
unknown committed
1792

1793
	bzero(buff, buff_len);
unknown's avatar
unknown committed
1794

1795
  	for (; key_part != end; key_part++) {
unknown's avatar
unknown committed
1796
	        is_null = FALSE;
1797 1798 1799 1800

    		if (key_part->null_bit) {
      			if (record[key_part->null_offset]
						& key_part->null_bit) {
unknown's avatar
unknown committed
1801 1802 1803 1804 1805 1806
				*buff = 1;
				is_null = TRUE;
      			} else {
				*buff = 0;
			}
			buff++;
1807
    		}
1808

unknown's avatar
unknown committed
1809 1810 1811 1812 1813 1814 1815
		field = key_part->field;
		mysql_type = field->type();

		if (mysql_type == FIELD_TYPE_TINY_BLOB
		    || mysql_type == FIELD_TYPE_MEDIUM_BLOB
		    || mysql_type == FIELD_TYPE_BLOB
		    || mysql_type == FIELD_TYPE_LONG_BLOB) {
1816

1817
			ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
unknown's avatar
unknown committed
1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839

		        if (is_null) {
				 buff += key_part->length + 2;
				 
				 continue;
			}
		    
		        blob_data = row_mysql_read_blob_ref(&blob_len,
				(byte*) (record
				+ (ulint)get_field_offset(table, field)),
					(ulint) field->pack_length());

			ut_a(get_field_offset(table, field)
						     == key_part->offset);
			if (blob_len > key_part->length) {
			        blob_len = key_part->length;
			}

			/* MySQL reserves 2 bytes for the length and the
			storage of the number is little-endian */

			ut_a(blob_len < 256);
1840
			*((byte*)buff) = (byte)blob_len;
unknown's avatar
unknown committed
1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856
			buff += 2;

			memcpy(buff, blob_data, blob_len);

			buff += key_part->length;
		} else {
		        if (is_null) {
				 buff += key_part->length;
				 
				 continue;
			}
			memcpy(buff, record + key_part->offset,
							key_part->length);
			buff += key_part->length;
		}
  	}
unknown's avatar
unknown committed
1857

1858 1859
	ut_a(buff <= buff_start + buff_len);

unknown's avatar
unknown committed
1860
	DBUG_RETURN((uint)(buff - buff_start));
1861 1862 1863
}

/******************************************************************
1864
Builds a template to the prebuilt struct. */
unknown's avatar
unknown committed
1865
static
1866
void
1867 1868 1869 1870 1871 1872 1873 1874 1875
build_template(
/*===========*/
	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
	THD*		thd,		/* in: current user thread, used
					only if templ_type is
					ROW_MYSQL_REC_FIELDS */
	TABLE*		table,		/* in: MySQL table */
	ulint		templ_type)	/* in: ROW_MYSQL_WHOLE_ROW or
					ROW_MYSQL_REC_FIELDS */
1876
{
1877 1878
	dict_index_t*	index;
	dict_index_t*	clust_index;
1879
	mysql_row_templ_t* templ;
1880
	Field*		field;
1881 1882
	ulint		n_fields;
	ulint		n_requested_fields	= 0;
unknown's avatar
Merge  
unknown committed
1883
	ibool		fetch_all_in_key	= FALSE;
1884
	ulint		i;
1885

1886
	clust_index = dict_table_get_first_index_noninline(prebuilt->table);
1887

unknown's avatar
unknown committed
1888 1889 1890 1891
	if (!prebuilt->hint_no_need_to_fetch_extra_cols) {
		/* We have a hint that we should at least fetch all
		columns in the key, or all columns in the table */

unknown's avatar
Merge  
unknown committed
1892
		if (prebuilt->read_just_key) {
unknown's avatar
unknown committed
1893
			/* MySQL has instructed us that it is enough to
1894 1895 1896 1897 1898
			fetch the columns in the key; looks like MySQL
			can set this flag also when there is only a
			prefix of the column in the key: in that case we
			retrieve the whole column from the clustered
			index */
unknown's avatar
unknown committed
1899

unknown's avatar
Merge  
unknown committed
1900 1901 1902
			fetch_all_in_key = TRUE;
		} else {
			/* We are building a temporary table: fetch all
unknown's avatar
unknown committed
1903 1904 1905 1906 1907
 			columns; the reason is that MySQL may use the
			clustered index key to store rows, but the mechanism
			we use below to detect required columns does not
			reveal that. Actually, it might be enough to
			fetch only all in the key also in this case! */
unknown's avatar
unknown committed
1908

unknown's avatar
Merge  
unknown committed
1909 1910
			templ_type = ROW_MYSQL_WHOLE_ROW;
		}
1911 1912
	}

unknown's avatar
unknown committed
1913
	if (prebuilt->select_lock_type == LOCK_X) {
unknown's avatar
unknown committed
1914 1915 1916
		/* We always retrieve the whole clustered index record if we
		use exclusive row level locks, for example, if the read is
		done in an UPDATE statement. */
unknown's avatar
unknown committed
1917 1918 1919 1920

	        templ_type = ROW_MYSQL_WHOLE_ROW;
	}

1921
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
unknown's avatar
unknown committed
1922 1923 1924 1925 1926
		/* In versions < 3.23.50 we always retrieved the clustered
		index record if prebuilt->select_lock_type == LOCK_S,
		but there is really not need for that, and in some cases
		performance could be seriously degraded because the MySQL
		optimizer did not know about our convention! */
1927

unknown's avatar
unknown committed
1928
		index = prebuilt->index;
1929 1930
	} else {
		index = clust_index;
1931
	}
1932

1933 1934 1935 1936 1937 1938 1939
	if (index == clust_index) {
		prebuilt->need_to_access_clustered = TRUE;
	} else {
		prebuilt->need_to_access_clustered = FALSE;
		/* Below we check column by column if we need to access
		the clustered index */
	}
1940

1941 1942 1943 1944 1945 1946 1947
	n_fields = (ulint)table->fields;

	if (!prebuilt->mysql_template) {
		prebuilt->mysql_template = (mysql_row_templ_t*)
						mem_alloc_noninline(
					n_fields * sizeof(mysql_row_templ_t));
	}
1948

1949 1950
	prebuilt->template_type = templ_type;
	prebuilt->null_bitmap_len = table->null_bytes;
1951

1952 1953
	prebuilt->templ_contains_blob = FALSE;

1954
	for (i = 0; i < n_fields; i++) {
1955
		templ = prebuilt->mysql_template + n_requested_fields;
1956 1957
		field = table->field[i];

1958
		if (templ_type == ROW_MYSQL_REC_FIELDS
1959 1960
			&& !(fetch_all_in_key
			     && dict_index_contains_col_or_prefix(index, i))
1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971
			&& thd->query_id != field->query_id
			&& thd->query_id != (field->query_id ^ MAX_ULONG_BIT)
			&& thd->query_id !=
				(field->query_id ^ (MAX_ULONG_BIT >> 1))) {

			/* This field is not needed in the query, skip it */

			goto skip_field;
		}

		n_requested_fields++;
1972

1973
		templ->col_no = i;
1974

1975 1976 1977
		if (index == clust_index) {
			templ->rec_field_no = (index->table->cols + i)
								->clust_pos;
1978
		} else {
1979 1980
			templ->rec_field_no = dict_index_get_nth_col_pos(
								index, i);
1981 1982
		}

1983 1984 1985 1986 1987 1988 1989 1990
		if (templ->rec_field_no == ULINT_UNDEFINED) {
			prebuilt->need_to_access_clustered = TRUE;
		}

		if (field->null_ptr) {
			templ->mysql_null_byte_offset =
				(ulint) ((char*) field->null_ptr
					- (char*) table->record[0]);
1991

1992 1993 1994 1995
			templ->mysql_null_bit_mask = (ulint) field->null_bit;
		} else {
			templ->mysql_null_bit_mask = 0;
		}
1996

1997 1998
		templ->mysql_col_offset = (ulint)
					get_field_offset(table, field);
1999

2000 2001 2002
		templ->mysql_col_len = (ulint) field->pack_length();
		templ->type = get_innobase_type_from_mysql_type(field);
		templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
2003

2004 2005
		if (templ->type == DATA_BLOB) {
			prebuilt->templ_contains_blob = TRUE;
2006
		}
2007 2008 2009
skip_field:
		;
	}
2010

2011
	prebuilt->n_template = n_requested_fields;
2012

2013 2014 2015 2016 2017
	if (prebuilt->need_to_access_clustered) {
		/* Change rec_field_no's to correspond to the clustered index
		record */
		for (i = 0; i < n_requested_fields; i++) {
			templ = prebuilt->mysql_template + i;
2018

2019 2020 2021
			templ->rec_field_no =
			    (index->table->cols + templ->col_no)->clust_pos;
		}
2022
	}
2023 2024 2025
}

/************************************************************************
2026
Stores a row in an InnoDB database, to the table specified in this
2027 2028 2029 2030 2031
handle. */

int
ha_innobase::write_row(
/*===================*/
2032 2033
				/* out: error code */
	mysql_byte* 	record)	/* in: a row in MySQL format */
2034
{
2035
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
2036
  	int 		error;
2037
	longlong	auto_inc;
unknown's avatar
unknown committed
2038
	longlong	dummy;
2039 2040
	ibool           incremented_auto_inc_for_stat = FALSE;
	ibool           incremented_auto_inc_counter = FALSE;
unknown's avatar
unknown committed
2041
	ibool           skip_auto_inc_decr;
unknown's avatar
unknown committed
2042

2043
  	DBUG_ENTER("ha_innobase::write_row");
2044

unknown's avatar
unknown committed
2045 2046 2047 2048
	if (prebuilt->trx !=
			(trx_t*) current_thd->transaction.all.innobase_tid) {
		fprintf(stderr,
"InnoDB: Error: the transaction object for the table handle is at\n"
2049 2050 2051 2052 2053 2054 2055 2056 2057
"InnoDB: %p, but for the current thread it is at %p\n",
			prebuilt->trx,
			current_thd->transaction.all.innobase_tid);
		fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
		ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200);
		fputs("\n"
			"InnoDB: Dump of 200 bytes around transaction.all: ",
			stderr);
		ut_print_buf(stderr,
unknown's avatar
unknown committed
2058
			((byte*)(&(current_thd->transaction.all))) - 100, 200);
2059 2060
		putc('\n', stderr);
		ut_error;
unknown's avatar
unknown committed
2061
	}
unknown's avatar
unknown committed
2062

2063 2064 2065 2066 2067
  	statistic_increment(ha_write_count, &LOCK_status);

  	if (table->time_stamp) {
    		update_timestamp(record + table->time_stamp - 1);
    	}
2068

unknown's avatar
unknown committed
2069 2070 2071
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
2072 2073

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
2074 2075
	}

2076
  	if (table->next_number_field && record == table->record[0]) {
unknown's avatar
unknown committed
2077 2078
		/* This is the case where the table has an
		auto-increment column */
unknown's avatar
unknown committed
2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103

		/* Initialize the auto-inc counter if it has not been
		initialized yet */

		if (0 == dict_table_autoinc_peek(prebuilt->table)) {

			/* This call initializes the counter */
		        error = innobase_read_and_init_auto_inc(&dummy);

			if (error) {
				/* Deadlock or lock wait timeout */

				goto func_exit;
			}

			/* We have to set sql_stat_start to TRUE because
			the above call probably has called a select, and
			has reset that flag; row_insert_for_mysql has to
			know to set the IX intention lock on the table,
			something it only does at the start of each
			statement */

			prebuilt->sql_stat_start = TRUE;
		}

2104 2105
	        /* Fetch the value the user possibly has set in the
	        autoincrement field */
unknown's avatar
unknown committed
2106

2107 2108
	        auto_inc = table->next_number_field->val_int();

2109 2110 2111
		/* In replication and also otherwise the auto-inc column 
		can be set with SET INSERT_ID. Then we must look at
		user_thd->next_insert_id. If it is nonzero and the user
unknown's avatar
Merge  
unknown committed
2112 2113 2114
		has not supplied a value, we must use it, and use values
		incremented by 1 in all subsequent inserts within the
		same SQL statement! */
2115 2116 2117

		if (auto_inc == 0 && user_thd->next_insert_id != 0) {
		        auto_inc = user_thd->next_insert_id;
unknown's avatar
Merge  
unknown committed
2118
		        auto_inc_counter_for_this_stat = auto_inc;
2119
		}
2120

unknown's avatar
Merge  
unknown committed
2121 2122 2123 2124 2125 2126
		if (auto_inc == 0 && auto_inc_counter_for_this_stat) {
			/* The user set the auto-inc counter for
			this SQL statement with SET INSERT_ID. We must
			assign sequential values from the counter. */

			auto_inc_counter_for_this_stat++;
2127
			incremented_auto_inc_for_stat = TRUE;
unknown's avatar
Merge  
unknown committed
2128 2129 2130 2131 2132 2133 2134

			auto_inc = auto_inc_counter_for_this_stat;

			/* We give MySQL a new value to place in the
			auto-inc column */
			user_thd->next_insert_id = auto_inc;
		}
unknown's avatar
unknown committed
2135

2136
		if (auto_inc != 0) {
unknown's avatar
unknown committed
2137 2138 2139
			/* This call will calculate the max of the current
			value and the value supplied by the user and
			update the counter accordingly */
2140 2141 2142 2143 2144 2145 2146 2147

			/* We have to use the transactional lock mechanism
			on the auto-inc counter of the table to ensure
			that replication and roll-forward of the binlog
			exactly imitates also the given auto-inc values.
			The lock is released at each SQL statement's
			end. */

unknown's avatar
unknown committed
2148
			innodb_srv_conc_enter_innodb(prebuilt->trx);
2149
			error = row_lock_table_autoinc_for_mysql(prebuilt);
unknown's avatar
unknown committed
2150
			innodb_srv_conc_exit_innodb(prebuilt->trx);
2151 2152

			if (error != DB_SUCCESS) {
unknown's avatar
unknown committed
2153

unknown's avatar
unknown committed
2154
				error = convert_error_code_to_mysql(error,
unknown's avatar
unknown committed
2155
								    user_thd);
2156 2157
				goto func_exit;
			}	
unknown's avatar
unknown committed
2158

2159 2160
			dict_table_autoinc_update(prebuilt->table, auto_inc);
		} else {
unknown's avatar
unknown committed
2161
			innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2162

2163 2164 2165 2166 2167
			if (!prebuilt->trx->auto_inc_lock) {

				error = row_lock_table_autoinc_for_mysql(
								prebuilt);
				if (error != DB_SUCCESS) {
unknown's avatar
unknown committed
2168 2169
 					innodb_srv_conc_exit_innodb(
							prebuilt->trx);
unknown's avatar
unknown committed
2170

2171
					error = convert_error_code_to_mysql(
unknown's avatar
unknown committed
2172
							error, user_thd);
2173 2174 2175 2176
					goto func_exit;
				}
			}	

2177 2178 2179
			/* The following call gets the value of the auto-inc
			counter of the table and increments it by 1 */

2180
			auto_inc = dict_table_autoinc_get(prebuilt->table);
2181 2182
			incremented_auto_inc_counter = TRUE;

unknown's avatar
unknown committed
2183
			innodb_srv_conc_exit_innodb(prebuilt->trx);
2184

unknown's avatar
unknown committed
2185 2186
			/* We can give the new value for MySQL to place in
			the field */
2187

unknown's avatar
unknown committed
2188
			user_thd->next_insert_id = auto_inc;
2189
		}
unknown's avatar
unknown committed
2190

unknown's avatar
unknown committed
2191 2192 2193
		/* This call of a handler.cc function places
		user_thd->next_insert_id to the column value, if the column
		value was not set by the user */
2194

unknown's avatar
unknown committed
2195 2196
    		update_auto_increment();
	}
2197

2198 2199 2200 2201
	if (prebuilt->mysql_template == NULL
			|| prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
		/* Build the template used in converting quickly between
		the two database formats */
2202

2203 2204
		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}
2205

unknown's avatar
unknown committed
2206
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2207

2208
	error = row_insert_for_mysql((byte*) record, prebuilt);
2209

unknown's avatar
unknown committed
2210
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2211

2212 2213 2214
	if (error != DB_SUCCESS) {
	        /* If the insert did not succeed we restore the value of
		the auto-inc counter we used; note that this behavior was
unknown's avatar
unknown committed
2215 2216 2217
		introduced only in version 4.0.4.
		NOTE that a REPLACE command handles a duplicate key error
		itself, and we must not decrement the autoinc counter
unknown's avatar
unknown committed
2218 2219 2220 2221 2222 2223
		if we are performing a REPLACE statement.
		NOTE 2: if there was an error, for example a deadlock,
		which caused InnoDB to roll back the whole transaction
		already in the call of row_insert_for_mysql(), we may no
		longer have the AUTO-INC lock, and cannot decrement
		the counter here. */
unknown's avatar
unknown committed
2224 2225

	        skip_auto_inc_decr = FALSE;
unknown's avatar
unknown committed
2226

unknown's avatar
unknown committed
2227 2228 2229 2230
	        if (error == DB_DUPLICATE_KEY
		    && (user_thd->lex.sql_command == SQLCOM_REPLACE
			|| user_thd->lex.sql_command
			                 == SQLCOM_REPLACE_SELECT)) {
unknown's avatar
unknown committed
2231 2232 2233

		        skip_auto_inc_decr= TRUE;
		}
2234

unknown's avatar
unknown committed
2235 2236
	        if (!skip_auto_inc_decr && incremented_auto_inc_counter
		    && prebuilt->trx->auto_inc_lock) {
unknown's avatar
unknown committed
2237
	                dict_table_autoinc_decrement(prebuilt->table);
2238 2239
	        }

unknown's avatar
unknown committed
2240 2241
		if (!skip_auto_inc_decr && incremented_auto_inc_for_stat
		    && prebuilt->trx->auto_inc_lock) {
2242 2243 2244 2245
		        auto_inc_counter_for_this_stat--;
		}
	}

unknown's avatar
unknown committed
2246
	error = convert_error_code_to_mysql(error, user_thd);
2247

2248
	/* Tell InnoDB server that there might be work for
2249
	utility threads: */
2250
func_exit:
2251
	innobase_active_small();
2252 2253 2254 2255

  	DBUG_RETURN(error);
}

2256
/******************************************************************
2257
Converts field data for storage in an InnoDB update vector. */
2258 2259 2260 2261 2262 2263 2264 2265 2266 2267
inline
mysql_byte*
innobase_convert_and_store_changed_col(
/*===================================*/
				/* out: pointer to the end of the converted
				data in the buffer */
	upd_field_t*	ufield,	/* in/out: field in the update vector */
	mysql_byte*	buf,	/* in: buffer we can use in conversion */
	mysql_byte*	data,	/* in: column data to store */
	ulint		len,	/* in: data len */
2268
	ulint		col_type,/* in: data type in InnoDB type numbers */
2269
	ulint		is_unsigned)/* in: != 0 if an unsigned integer type */
2270
{
2271 2272 2273 2274
	uint	i;

	if (len == UNIV_SQL_NULL) {
		data = NULL;
2275 2276
	} else if (col_type == DATA_VARCHAR || col_type == DATA_BINARY
		   || col_type == DATA_VARMYSQL) {
2277 2278 2279 2280
	        /* Remove trailing spaces */
        	while (len > 0 && data[len - 1] == ' ') {
	                len--;
	        }
2281
	} else if (col_type == DATA_INT) {
2282
		/* Store integer data in InnoDB in a big-endian
2283
		format, sign bit negated, if signed */
2284

2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295
		for (i = 0; i < len; i++) {
			buf[len - 1 - i] = data[i];
		}

		if (!is_unsigned) {
			buf[0] = buf[0] ^ 128;
		}

		data = buf;

		buf += len;
2296
	}
2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314

	ufield->new_val.data = data;
	ufield->new_val.len = len;

	return(buf);
}

/**************************************************************************
Checks which fields have changed in a row and stores information
of them to an update vector. */
static
int
calc_row_difference(
/*================*/
					/* out: error number or 0 */
	upd_t*		uvect,		/* in/out: update vector */
	mysql_byte* 	old_row,	/* in: old row in MySQL format */
	mysql_byte* 	new_row,	/* in: new row in MySQL format */
unknown's avatar
unknown committed
2315 2316
	struct st_table* table,		/* in: table in MySQL data
					dictionary */
2317
	mysql_byte*	upd_buff,	/* in: buffer to use */
unknown's avatar
unknown committed
2318
	ulint		buff_len,	/* in: buffer length */
2319
	row_prebuilt_t*	prebuilt,	/* in: InnoDB prebuilt struct */
2320 2321
	THD*		thd)		/* in: user thread */
{
unknown's avatar
unknown committed
2322
	mysql_byte*	original_upd_buff = upd_buff;
2323
	Field*		field;
2324 2325 2326
	uint		n_fields;
	ulint		o_len;
	ulint		n_len;
unknown's avatar
unknown committed
2327 2328 2329
	byte*	        o_ptr;
        byte*	        n_ptr;
        byte*	        buf;
2330
	upd_field_t*	ufield;
2331 2332
	ulint		col_type;
	ulint		is_unsigned;
2333
	ulint		n_changed = 0;
2334
	uint		i;
2335 2336 2337

	n_fields = table->fields;

2338
	/* We use upd_buff to convert changed fields */
unknown's avatar
unknown committed
2339
	buf = (byte*) upd_buff;
2340

2341 2342 2343
	for (i = 0; i < n_fields; i++) {
		field = table->field[i];

2344
		/* if (thd->query_id != field->query_id) { */
2345 2346
			/* TODO: check that these fields cannot have
			changed! */
2347

2348 2349
		/*	goto skip_field;
		}*/
2350

unknown's avatar
unknown committed
2351 2352
		o_ptr = (byte*) old_row + get_field_offset(table, field);
		n_ptr = (byte*) new_row + get_field_offset(table, field);
2353 2354 2355
		o_len = field->pack_length();
		n_len = field->pack_length();

2356
		col_type = get_innobase_type_from_mysql_type(field);
2357
		is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
2358 2359 2360 2361 2362 2363 2364 2365 2366 2367

		switch (col_type) {

		case DATA_BLOB:
			o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
			n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
			break;
		case DATA_VARCHAR:
		case DATA_BINARY:
		case DATA_VARMYSQL:
unknown's avatar
unknown committed
2368 2369 2370 2371
			o_ptr = row_mysql_read_var_ref_noninline(&o_len,
								o_ptr);
			n_ptr = row_mysql_read_var_ref_noninline(&n_len,
								n_ptr);
2372 2373 2374
		default:
			;
		}
2375

2376 2377 2378 2379 2380
		if (field->null_ptr) {
			if (field_in_record_is_null(table, field,
							(char*) old_row)) {
				o_len = UNIV_SQL_NULL;
			}
2381

2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393
			if (field_in_record_is_null(table, field,
							(char*) new_row)) {
				n_len = UNIV_SQL_NULL;
			}
		}

		if (o_len != n_len || (o_len != UNIV_SQL_NULL &&
					0 != memcmp(o_ptr, n_ptr, o_len))) {
			/* The field has changed */

			ufield = uvect->fields + n_changed;

unknown's avatar
unknown committed
2394 2395 2396 2397
			buf = (byte*)
                          innobase_convert_and_store_changed_col(ufield,
					  (mysql_byte*)buf,
					  (mysql_byte*)n_ptr, n_len, col_type,
2398
						is_unsigned);
2399
			ufield->exp = NULL;
2400 2401
			ufield->field_no =
					(prebuilt->table->cols + i)->clust_pos;
2402 2403 2404 2405 2406 2407 2408
			n_changed++;
		}
	}

	uvect->n_fields = n_changed;
	uvect->info_bits = 0;

unknown's avatar
unknown committed
2409 2410
	ut_a(buf <= (byte*)original_upd_buff + buff_len);

2411 2412 2413 2414 2415 2416 2417
	return(0);
}

/**************************************************************************
Updates a row given as a parameter to a new value. Note that we are given
whole rows, not just the fields which are updated: this incurs some
overhead for CPU when we check which fields are actually updated.
2418
TODO: currently InnoDB does not prevent the 'Halloween problem':
2419 2420
in a searched update a single row can get updated several times
if its index columns are updated! */
2421

2422 2423 2424 2425
int
ha_innobase::update_row(
/*====================*/
					/* out: error number or 0 */
2426 2427
	const mysql_byte* 	old_row,/* in: old row in MySQL format */
	mysql_byte* 		new_row)/* in: new row in MySQL format */
2428 2429 2430 2431 2432
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	upd_t*		uvect;
	int		error = 0;

2433
	DBUG_ENTER("ha_innobase::update_row");
2434

unknown's avatar
unknown committed
2435 2436 2437
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);

2438 2439 2440 2441
        if (table->time_stamp) {
                update_timestamp(new_row + table->time_stamp - 1);
	}

unknown's avatar
unknown committed
2442 2443 2444
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
2445 2446

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
2447 2448
	}

2449 2450 2451 2452 2453
	if (prebuilt->upd_node) {
		uvect = prebuilt->upd_node->update;
	} else {
		uvect = row_get_prebuilt_update_vector(prebuilt);
	}
2454 2455 2456 2457

	/* Build an update vector from the modified fields in the rows
	(uses upd_buff of the handle) */

2458
	calc_row_difference(uvect, (mysql_byte*) old_row, new_row, table,
unknown's avatar
unknown committed
2459 2460 2461
			upd_buff, (ulint)upd_and_key_val_buff_len,
			prebuilt, user_thd);

2462 2463 2464
	/* This is not a delete */
	prebuilt->upd_node->is_delete = FALSE;

unknown's avatar
unknown committed
2465
	assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
2466

unknown's avatar
unknown committed
2467
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2468

2469
	error = row_update_for_mysql((byte*) old_row, prebuilt);
2470

unknown's avatar
unknown committed
2471
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2472

unknown's avatar
unknown committed
2473
	error = convert_error_code_to_mysql(error, user_thd);
2474

2475
	/* Tell InnoDB server that there might be work for
2476 2477
	utility threads: */

2478
	innobase_active_small();
2479 2480 2481 2482 2483 2484 2485 2486 2487 2488

	DBUG_RETURN(error);
}

/**************************************************************************
Deletes a row given as the parameter. */

int
ha_innobase::delete_row(
/*====================*/
2489 2490
					/* out: error number or 0 */
	const mysql_byte* record)	/* in: a row in MySQL format */
2491 2492 2493 2494
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int		error = 0;

2495
	DBUG_ENTER("ha_innobase::delete_row");
2496

unknown's avatar
unknown committed
2497 2498 2499
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);

unknown's avatar
unknown committed
2500 2501 2502
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
2503 2504

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
2505 2506
	}

2507 2508 2509
	if (!prebuilt->upd_node) {
		row_get_prebuilt_update_vector(prebuilt);
	}
2510 2511

	/* This is a delete */
2512

2513
	prebuilt->upd_node->is_delete = TRUE;
2514

unknown's avatar
unknown committed
2515
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2516

2517
	error = row_update_for_mysql((byte*) record, prebuilt);
2518

unknown's avatar
unknown committed
2519
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2520

unknown's avatar
unknown committed
2521
	error = convert_error_code_to_mysql(error, user_thd);
2522

2523
	/* Tell the InnoDB server that there might be work for
2524 2525
	utility threads: */

2526
	innobase_active_small();
2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542

	DBUG_RETURN(error);
}

/**********************************************************************
Initializes a handle to use an index. */

int
ha_innobase::index_init(
/*====================*/
			/* out: 0 or error number */
	uint 	keynr)	/* in: key (index) number */
{
	int 	error	= 0;
  	DBUG_ENTER("index_init");

unknown's avatar
Merge  
unknown committed
2543
	error = change_active_index(keynr);
2544 2545 2546 2547 2548

  	DBUG_RETURN(error);
}

/**********************************************************************
2549
Currently does nothing. */
2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562

int
ha_innobase::index_end(void)
/*========================*/
{
	int 	error	= 0;
  	DBUG_ENTER("index_end");

  	DBUG_RETURN(error);
}

/*************************************************************************
Converts a search mode flag understood by MySQL to a flag understood
2563
by InnoDB. */
2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577
inline
ulint
convert_search_mode_to_innobase(
/*============================*/
	enum ha_rkey_function	find_flag)
{
	switch (find_flag) {
  		case HA_READ_KEY_EXACT:		return(PAGE_CUR_GE);
  			/* the above does not require the index to be UNIQUE */
  		case HA_READ_KEY_OR_NEXT:	return(PAGE_CUR_GE);
		case HA_READ_KEY_OR_PREV:	return(PAGE_CUR_LE);
		case HA_READ_AFTER_KEY:		return(PAGE_CUR_G);
		case HA_READ_BEFORE_KEY:	return(PAGE_CUR_L);
		case HA_READ_PREFIX:		return(PAGE_CUR_GE);
unknown's avatar
unknown committed
2578
	        case HA_READ_PREFIX_LAST:       return(PAGE_CUR_LE);
unknown's avatar
unknown committed
2579
		  /* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always
unknown's avatar
unknown committed
2580
		  pass a complete-field prefix of a key value as the search
unknown's avatar
unknown committed
2581 2582 2583 2584 2585
		  tuple. I.e., it is not allowed that the last field would
		  just contain n first bytes of the full field value.
		  MySQL uses a 'padding' trick to convert LIKE 'abc%'
		  type queries so that it can use as a search tuple
		  a complete-field-prefix of a key value. Thus, the InnoDB
unknown's avatar
unknown committed
2586 2587 2588 2589 2590 2591 2592
		  search mode PAGE_CUR_LE_OR_EXTENDS is never used.
		  TODO: when/if MySQL starts to use also partial-field
		  prefixes, we have to deal with stripping of spaces
		  and comparison of non-latin1 char type fields in
		  innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to
		  work correctly. */

2593 2594 2595 2596 2597
		default:			assert(0);
	}

	return(0);
}
2598

unknown's avatar
unknown committed
2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647
/*
   BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
   ---------------------------------------------------
The following does not cover all the details, but explains how we determine
the start of a new SQL statement, and what is associated with it.

For each table in the database the MySQL interpreter may have several
table handle instances in use, also in a single SQL query. For each table
handle instance there is an InnoDB  'prebuilt' struct which contains most
of the InnoDB data associated with this table handle instance.

  A) if the user has not explicitly set any MySQL table level locks:

  1) MySQL calls ::external_lock to set an 'intention' table level lock on
the table of the handle instance. There we set
prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
true if we are taking this table handle instance to use in a new SQL
statement issued by the user. We also increment trx->n_mysql_tables_in_use.

  2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
instructions to prebuilt->template of the table handle instance in
::index_read. The template is used to save CPU time in large joins.

  3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
allocate a new consistent read view for the trx if it does not yet have one,
or in the case of a locking read, set an InnoDB 'intention' table level
lock on the table.

  4) We do the SELECT. MySQL may repeatedly call ::index_read for the
same table handle instance, if it is a join.

  5) When the SELECT ends, MySQL removes its intention table level locks
in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
 (a) we execute a COMMIT there if the autocommit is on,
 (b) we also release possible 'SQL statement level resources' InnoDB may
have for this SQL statement. The MySQL interpreter does NOT execute
autocommit for pure read transactions, though it should. That is why the
table handler in that case has to execute the COMMIT in ::external_lock.

  B) If the user has explicitly set MySQL table level locks, then MySQL
does NOT call ::external_lock at the start of the statement. To determine
when we are at the start of a new SQL statement we at the start of
::index_read also compare the query id to the latest query id where the
table handle instance was used. If it has changed, we know we are at the
start of a new SQL statement. Since the query id can theoretically
overwrap, we use this test only as a secondary way of determining the
start of a new SQL statement. */


2648 2649 2650 2651 2652 2653 2654 2655 2656
/**************************************************************************
Positions an index cursor to the index specified in the handle. Fetches the
row if any. */

int
ha_innobase::index_read(
/*====================*/
					/* out: 0, HA_ERR_KEY_NOT_FOUND,
					or error number */
2657
	mysql_byte*		buf,	/* in/out: buffer for the returned
2658
					row */
2659
	const mysql_byte* 	key_ptr,/* in: key value; if this is NULL
2660
					we position the cursor at the
unknown's avatar
unknown committed
2661 2662 2663
					start or end of index; this can
					also contain an InnoDB row id, in
					which case key_len is the InnoDB
unknown's avatar
unknown committed
2664 2665 2666 2667
					row id length; the key value can
					also be a prefix of a full key value,
					and the last column can be a prefix
					of a full column */
2668
	uint			key_len,/* in: key value length */
2669 2670 2671 2672 2673 2674 2675 2676 2677 2678
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	ulint		mode;
	dict_index_t*	index;
	ulint		match_mode 	= 0;
	int 		error;
	ulint		ret;

  	DBUG_ENTER("index_read");
unknown's avatar
unknown committed
2679 2680 2681 2682

	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);

2683
  	statistic_increment(ha_read_key_count, &LOCK_status);
2684

unknown's avatar
unknown committed
2685 2686 2687
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
2688 2689

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
2690 2691
	}

2692
	index = prebuilt->index;
2693

unknown's avatar
unknown committed
2694 2695
	/* Note that if the index for which the search template is built is not
        necessarily prebuilt->index, but can also be the clustered index */
2696

2697 2698 2699 2700
	if (prebuilt->sql_stat_start) {
		build_template(prebuilt, user_thd, table,
							ROW_MYSQL_REC_FIELDS);
	}
2701 2702

	if (key_ptr) {
unknown's avatar
unknown committed
2703 2704 2705
	        /* Convert the search key value to InnoDB format into
		prebuilt->search_tuple */

2706
		row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple,
unknown's avatar
unknown committed
2707 2708 2709 2710 2711
					(byte*) key_val_buff,
					(ulint)upd_and_key_val_buff_len,
					index,
					(byte*) key_ptr,
					(ulint) key_len);
2712 2713 2714 2715 2716 2717
	} else {
		/* We position the cursor to the last or the first entry
		in the index */

 		dtuple_set_n_fields(prebuilt->search_tuple, 0);
	}
2718

2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732
	mode = convert_search_mode_to_innobase(find_flag);

	match_mode = 0;

	if (find_flag == HA_READ_KEY_EXACT) {
		match_mode = ROW_SEL_EXACT;

	} else if (find_flag == HA_READ_PREFIX
				|| find_flag == HA_READ_PREFIX_LAST) {
		match_mode = ROW_SEL_EXACT_PREFIX;
	}

	last_match_mode = match_mode;

unknown's avatar
unknown committed
2733
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2734

unknown's avatar
unknown committed
2735
	ret = row_search_for_mysql((byte*) buf, mode, prebuilt, match_mode, 0);
2736

unknown's avatar
unknown committed
2737
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2738

2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750
	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;
	} else {
unknown's avatar
unknown committed
2751
		error = convert_error_code_to_mysql(ret, user_thd);
2752 2753
		table->status = STATUS_NOT_FOUND;
	}
2754

2755 2756 2757
	DBUG_RETURN(error);
}

unknown's avatar
unknown committed
2758 2759 2760
/***********************************************************************
The following functions works like index_read, but it find the last
row with the current key value or prefix. */
2761 2762

int
unknown's avatar
unknown committed
2763 2764 2765 2766 2767 2768 2769 2770 2771
ha_innobase::index_read_last(
/*=========================*/
			           /* out: 0, HA_ERR_KEY_NOT_FOUND, or an
				   error code */
        mysql_byte*       buf,     /* out: fetched row */
        const mysql_byte* key_ptr, /* in: key value, or a prefix of a full
				   key value */
	uint              key_len) /* in: length of the key val or prefix
				   in bytes */
2772
{
unknown's avatar
unknown committed
2773
        return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
2774 2775
}

2776 2777 2778 2779 2780 2781
/************************************************************************
Changes the active index of a handle. */

int
ha_innobase::change_active_index(
/*=============================*/
2782 2783 2784
			/* out: 0 or error code */
	uint 	keynr)	/* in: use this index; MAX_KEY means always clustered
			index, even if it was internally generated by
2785
			InnoDB */
2786
{
unknown's avatar
unknown committed
2787 2788 2789 2790
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	KEY*		key=0;
	statistic_increment(ha_read_key_count, &LOCK_status);
	DBUG_ENTER("change_active_index");
2791

unknown's avatar
unknown committed
2792
	ut_a(prebuilt->trx ==
unknown's avatar
unknown committed
2793 2794
	     (trx_t*) current_thd->transaction.all.innobase_tid);

unknown's avatar
unknown committed
2795
	active_index = keynr;
2796

unknown's avatar
unknown committed
2797 2798
	if (keynr != MAX_KEY && table->keys > 0) {
		key = table->key_info + active_index;
2799

unknown's avatar
unknown committed
2800
		prebuilt->index = dict_table_get_index_noninline(
unknown's avatar
unknown committed
2801 2802
						     prebuilt->table,
						     key->name);
unknown's avatar
unknown committed
2803 2804
        } else {
		prebuilt->index = dict_table_get_first_index_noninline(
unknown's avatar
unknown committed
2805
							   prebuilt->table);
unknown's avatar
unknown committed
2806
	}
2807

unknown's avatar
unknown committed
2808 2809 2810 2811 2812 2813
	if (!prebuilt->index) {
	       sql_print_error(
"Innodb could not find key n:o %u with name %s from dict cache for table %s",
	      keynr, key ? key->name : "NULL", prebuilt->table->name);
	      DBUG_RETURN(1);
	}
2814

unknown's avatar
unknown committed
2815
	assert(prebuilt->search_tuple != 0);
unknown's avatar
Merge  
unknown committed
2816

unknown's avatar
unknown committed
2817
	dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
2818

unknown's avatar
unknown committed
2819
	dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
2820
			prebuilt->index->n_fields);
2821

unknown's avatar
unknown committed
2822 2823 2824
	/* Maybe MySQL changes the active index for a handle also
	during some queries, we do not know: then it is safest to build
	the template such that all columns will be fetched. */
2825

unknown's avatar
unknown committed
2826
	build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
2827

unknown's avatar
unknown committed
2828
	DBUG_RETURN(0);
2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839
}

/**************************************************************************
Positions an index cursor to the index specified in keynr. Fetches the
row if any. */
/* ??? This is only used to read whole keys ??? */

int
ha_innobase::index_read_idx(
/*========================*/
					/* out: error number or 0 */
2840
	mysql_byte*	buf,		/* in/out: buffer for the returned
2841 2842
					row */
	uint 		keynr,		/* in: use this index */
2843
	const mysql_byte* key,		/* in: key value; if this is NULL
2844 2845 2846 2847 2848
					we position the cursor at the
					start or end of index */
	uint		key_len,	/* in: key value length */
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
unknown's avatar
Merge  
unknown committed
2849 2850 2851 2852
	if (change_active_index(keynr)) {

		return(1);
	}
2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865

	return(index_read(buf, key, key_len, find_flag));
}

/***************************************************************************
Reads the next or previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::general_fetch(
/*=======================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
2866
	mysql_byte* 	buf,	/* in/out: buffer for next row in MySQL
2867 2868 2869 2870 2871 2872 2873 2874
				format */
	uint 	direction,	/* in: ROW_SEL_NEXT or ROW_SEL_PREV */
	uint	match_mode)	/* in: 0, ROW_SEL_EXACT, or
				ROW_SEL_EXACT_PREFIX */
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	ulint		ret;
	int		error	= 0;
2875

2876
	DBUG_ENTER("general_fetch");
2877

unknown's avatar
unknown committed
2878
	ut_a(prebuilt->trx ==
unknown's avatar
unknown committed
2879
	     (trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
2880

unknown's avatar
unknown committed
2881
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
unknown committed
2882

unknown's avatar
Merge  
unknown committed
2883 2884
	ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode,
								direction);
unknown's avatar
unknown committed
2885
	innodb_srv_conc_exit_innodb(prebuilt->trx);
2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898

	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;
	} else {
unknown's avatar
unknown committed
2899
		error = convert_error_code_to_mysql(ret, user_thd);
2900 2901
		table->status = STATUS_NOT_FOUND;
	}
2902

2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914
	DBUG_RETURN(error);
}

/***************************************************************************
Reads the next row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_next(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
2915
	mysql_byte* 	buf)	/* in/out: buffer for next row in MySQL
2916 2917
				format */
{
2918 2919
  	statistic_increment(ha_read_next_count, &LOCK_status);

2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930
	return(general_fetch(buf, ROW_SEL_NEXT, 0));
}

/***********************************************************************
Reads the next row matching to the key value given as the parameter. */

int
ha_innobase::index_next_same(
/*=========================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
2931 2932
	mysql_byte* 	buf,	/* in/out: buffer for the row */
	const mysql_byte* key,	/* in: key value */
2933 2934
	uint 		keylen)	/* in: key value length */
{
2935
  	statistic_increment(ha_read_next_count, &LOCK_status);
2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948

	return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
}

/***************************************************************************
Reads the previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_prev(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
2949
	mysql_byte* 	buf)	/* in/out: buffer for previous row in MySQL
2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961
				format */
{
	return(general_fetch(buf, ROW_SEL_PREV, 0));
}

/************************************************************************
Positions a cursor on the first record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_first(
/*=====================*/
2962
				/* out: 0, HA_ERR_END_OF_FILE,
2963 2964
				or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
2965 2966 2967 2968 2969 2970 2971 2972
{
	int	error;

  	DBUG_ENTER("index_first");
  	statistic_increment(ha_read_first_count, &LOCK_status);

  	error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);

2973 2974 2975 2976 2977 2978
        /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */

  	if (error == HA_ERR_KEY_NOT_FOUND) {
  		error = HA_ERR_END_OF_FILE;
  	}

2979 2980 2981 2982 2983 2984 2985 2986 2987 2988
  	DBUG_RETURN(error);
}

/************************************************************************
Positions a cursor on the last record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_last(
/*====================*/
2989 2990
				/* out: 0, HA_ERR_END_OF_FILE, or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
2991 2992 2993 2994
{
	int	error;

  	DBUG_ENTER("index_first");
2995
  	statistic_increment(ha_read_last_count, &LOCK_status);
2996 2997 2998

  	error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);

2999
        /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014

  	if (error == HA_ERR_KEY_NOT_FOUND) {
  		error = HA_ERR_END_OF_FILE;
  	}

  	DBUG_RETURN(error);
}

/********************************************************************
Initialize a table scan. */

int
ha_innobase::rnd_init(
/*==================*/
			/* out: 0 or error number */
3015
	bool	scan)	/* in: ???????? */
3016
{
unknown's avatar
Merge  
unknown committed
3017
	int	err;
unknown's avatar
unknown committed
3018

3019
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
3020

unknown's avatar
unknown committed
3021 3022 3023 3024 3025
	/* Store the active index value so that we can restore the original
	value after a scan */

	active_index_before_scan = active_index;

3026
	if (prebuilt->clust_index_was_generated) {
unknown's avatar
Merge  
unknown committed
3027
		err = change_active_index(MAX_KEY);
3028
	} else {
unknown's avatar
Merge  
unknown committed
3029
		err = change_active_index(primary_key);
3030
	}
3031

3032
  	start_of_scan = 1;
3033

unknown's avatar
Merge  
unknown committed
3034
 	return(err);
3035 3036 3037
}

/*********************************************************************
unknown's avatar
unknown committed
3038
Ends a table scan. */
3039 3040 3041 3042 3043 3044

int
ha_innobase::rnd_end(void)
/*======================*/
				/* out: 0 or error number */
{
unknown's avatar
unknown committed
3045
	/* Restore the old active_index back; MySQL may assume that a table
unknown's avatar
unknown committed
3046 3047 3048 3049 3050
	scan does not change active_index. We only restore the value if
	MySQL has called rnd_init before: sometimes MySQL seems to call
	rnd_end WITHOUT calling rnd_init. */

	if (active_index_before_scan != (uint)-1) {
unknown's avatar
unknown committed
3051

unknown's avatar
unknown committed
3052 3053 3054 3055
		change_active_index(active_index_before_scan);

		active_index_before_scan = (uint)-1;
	}
unknown's avatar
unknown committed
3056

3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067
  	return(index_end());
}

/*********************************************************************
Reads the next row in a table scan (also used to read the FIRST row
in a table scan). */

int
ha_innobase::rnd_next(
/*==================*/
			/* out: 0, HA_ERR_END_OF_FILE, or error number */
3068
	mysql_byte* buf)/* in/out: returns the row in this buffer,
3069 3070
			in MySQL format */
{
3071
	int	error;
3072 3073 3074 3075

  	DBUG_ENTER("rnd_next");
  	statistic_increment(ha_read_rnd_next_count, &LOCK_status);

3076
  	if (start_of_scan) {
3077 3078 3079 3080
		error = index_first(buf);
		if (error == HA_ERR_KEY_NOT_FOUND) {
			error = HA_ERR_END_OF_FILE;
		}
3081
		start_of_scan = 0;
3082
	} else {
3083
		error = general_fetch(buf, ROW_SEL_NEXT, 0);
3084
	}
3085

3086 3087 3088 3089
  	DBUG_RETURN(error);
}

/**************************************************************************
unknown's avatar
unknown committed
3090
Fetches a row from the table based on a row reference. */
3091

3092 3093 3094
int
ha_innobase::rnd_pos(
/*=================*/
3095 3096 3097
				/* out: 0, HA_ERR_KEY_NOT_FOUND,
				or error code */
	mysql_byte* 	buf,	/* in/out: buffer for the row */
unknown's avatar
unknown committed
3098 3099 3100 3101 3102
	mysql_byte*	pos)	/* in: primary key value of the row in the
				MySQL format, or the row id if the clustered
				index was internally generated by InnoDB;
				the length of data in pos has to be
				ref_length */
3103
{
3104 3105 3106
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int		error;
	uint		keynr	= active_index;
3107
	DBUG_ENTER("rnd_pos");
unknown's avatar
unknown committed
3108
	DBUG_DUMP("key", (char*) pos, ref_length);
unknown's avatar
unknown committed
3109

3110
	statistic_increment(ha_read_rnd_count, &LOCK_status);
3111

unknown's avatar
unknown committed
3112 3113 3114
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);

3115 3116 3117 3118
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from the row id: the
		row reference is the row id, not any key value
unknown's avatar
unknown committed
3119
		that MySQL knows of */
3120

unknown's avatar
Merge  
unknown committed
3121
		error = change_active_index(MAX_KEY);
3122
	} else {
unknown's avatar
Merge  
unknown committed
3123
		error = change_active_index(primary_key);
3124
	}
3125

unknown's avatar
Merge  
unknown committed
3126
	if (error) {
unknown's avatar
unknown committed
3127
	        DBUG_PRINT("error",("Got error: %ld",error));
unknown's avatar
Merge  
unknown committed
3128 3129
		DBUG_RETURN(error);
	}
unknown's avatar
unknown committed
3130

unknown's avatar
unknown committed
3131 3132 3133 3134
	/* Note that we assume the length of the row reference is fixed
        for the table, and it is == ref_length */

	error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
unknown's avatar
unknown committed
3135 3136 3137 3138
	if (error)
	{
	  DBUG_PRINT("error",("Got error: %ld",error));
	}
3139
	change_active_index(keynr);
3140

3141 3142 3143 3144
  	DBUG_RETURN(error);
}

/*************************************************************************
3145
Stores a reference to the current row to 'ref' field of the handle. Note
unknown's avatar
unknown committed
3146 3147
that in the case where we have generated the clustered index for the
table, the function parameter is illogical: we MUST ASSUME that 'record'
unknown's avatar
unknown committed
3148
is the current 'position' of the handle, because if row ref is actually
3149
the row id internally generated in InnoDB, then 'record' does not contain
3150 3151
it. We just guess that the row id must be for the record where the handle
was positioned the last time. */
3152 3153 3154 3155

void
ha_innobase::position(
/*==================*/
3156
	const mysql_byte*	record)	/* in: row in MySQL format */
3157
{
3158 3159
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	uint		len;
3160

unknown's avatar
unknown committed
3161 3162 3163
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);

3164 3165 3166 3167
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from row id: the
		row reference will be the row id, not any key value
unknown's avatar
unknown committed
3168
		that MySQL knows of */
3169 3170 3171 3172 3173

		len = DATA_ROW_ID_LEN;

		memcpy(ref, prebuilt->row_id, len);
	} else {
3174 3175
		len = store_key_val_for_row(primary_key, (char*)ref,
							 ref_length, record);
3176
	}
3177

unknown's avatar
unknown committed
3178 3179 3180
	/* Since we do not store len to the buffer 'ref', we must assume
	that len is always fixed for this table. The following assertion
	checks this. */
unknown's avatar
unknown committed
3181
  
unknown's avatar
unknown committed
3182 3183 3184 3185 3186
	if (len != ref_length) {
	        fprintf(stderr,
	 "InnoDB: Error: stored ref len is %lu, but table ref len is %lu\n",
		  (ulint)len, (ulint)ref_length);
	}
3187 3188 3189
}

/*********************************************************************
3190
Creates a table definition to an InnoDB database. */
3191 3192 3193 3194
static
int
create_table_def(
/*=============*/
3195
	trx_t*		trx,		/* in: InnoDB transaction handle */
3196 3197 3198 3199 3200 3201 3202 3203 3204
	TABLE*		form,		/* in: information on table
					columns and indexes */
	const char*	table_name)	/* in: table name */
{
	Field*		field;
	dict_table_t*	table;
	ulint		n_cols;
  	int 		error;
  	ulint		col_type;
3205 3206
  	ulint		nulls_allowed;
	ulint		unsigned_type;
unknown's avatar
unknown committed
3207 3208
	ulint		binary_type;
	ulint		nonlatin1_type;
3209
  	ulint		i;
3210

3211 3212 3213 3214 3215 3216 3217 3218 3219
  	DBUG_ENTER("create_table_def");
  	DBUG_PRINT("enter", ("table_name: %s", table_name));

	n_cols = form->fields;

	/* The '0' below specifies that everything is currently
	created in tablespace 0 */

	table = dict_mem_table_create((char*) table_name, 0, n_cols);
3220

3221 3222 3223
	for (i = 0; i < n_cols; i++) {
		field = form->field[i];

3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235
		col_type = get_innobase_type_from_mysql_type(field);
		if (field->null_ptr) {
			nulls_allowed = 0;
		} else {
			nulls_allowed = DATA_NOT_NULL;
		}

		if (field->flags & UNSIGNED_FLAG) {
			unsigned_type = DATA_UNSIGNED;
		} else {
			unsigned_type = 0;
		}
3236

unknown's avatar
unknown committed
3237 3238
		if (col_type == DATA_BLOB
		    && strcmp(default_charset_info->name, "latin1") != 0) {
unknown's avatar
unknown committed
3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250
			nonlatin1_type = DATA_NONLATIN1;
		} else {
		        nonlatin1_type = 0;
		}

		if (field->flags & BINARY_FLAG) {
			binary_type = DATA_BINARY_TYPE;
		        nonlatin1_type = 0;
		} else {
			binary_type = 0;
		}

3251
		dict_mem_table_add_col(table, (char*) field->field_name,
3252
					col_type, (ulint)field->type()
unknown's avatar
unknown committed
3253 3254
					| nulls_allowed | unsigned_type
					| nonlatin1_type | binary_type,
3255 3256 3257 3258 3259
					field->pack_length(), 0);
	}

	error = row_create_table_for_mysql(table, trx);

unknown's avatar
unknown committed
3260
	error = convert_error_code_to_mysql(error, NULL);
3261 3262 3263 3264 3265

	DBUG_RETURN(error);
}

/*********************************************************************
3266
Creates an index in an InnoDB database. */
3267 3268
static
int
3269 3270
create_index(
/*=========*/
3271
	trx_t*		trx,		/* in: InnoDB transaction handle */
3272 3273 3274 3275 3276
	TABLE*		form,		/* in: information on table
					columns and indexes */
	const char*	table_name,	/* in: table name */
	uint		key_num)	/* in: index number */
{
unknown's avatar
unknown committed
3277
	Field*		field;
3278
	dict_index_t*	index;
3279
  	int 		error;
3280 3281 3282 3283
	ulint		n_fields;
	KEY*		key;
	KEY_PART_INFO*	key_part;
	ulint		ind_type;
unknown's avatar
unknown committed
3284 3285
	ulint		col_type;
	ulint		prefix_len;
3286
  	ulint		i;
unknown's avatar
unknown committed
3287
  	ulint		j;
3288

3289
  	DBUG_ENTER("create_index");
3290

3291 3292 3293
	key = form->key_info + key_num;

    	n_fields = key->key_parts;
3294

3295 3296
    	ind_type = 0;

unknown's avatar
unknown committed
3297 3298
    	if (key_num == form->primary_key)
	{
3299 3300
		ind_type = ind_type | DICT_CLUSTERED;
	}
3301

3302 3303 3304 3305
	if (key->flags & HA_NOSAME ) {
		ind_type = ind_type | DICT_UNIQUE;
	}

3306
	/* The '0' below specifies that everything in InnoDB is currently
3307 3308 3309 3310 3311 3312 3313
	created in tablespace 0 */

	index = dict_mem_index_create((char*) table_name, key->name, 0,
						ind_type, n_fields);
	for (i = 0; i < n_fields; i++) {
		key_part = key->key_part + i;

3314
		/* (The flag HA_PART_KEY_SEG denotes in MySQL a column prefix
unknown's avatar
unknown committed
3315 3316 3317 3318 3319 3320 3321 3322 3323 3324
		field in an index: we only store a specified number of first
		bytes of the column to the index field.) The flag does not
		seem to be properly set by MySQL. Let us fall back on testing
		the length of the key part versus the column. */
		
		field = NULL;
		for (j = 0; j < form->fields; j++) {

			field = form->field[j];

3325
			if (0 == ut_cmp_in_lower_case(
unknown's avatar
unknown committed
3326
					(char*)field->field_name,
3327
					(char*)key_part->field->field_name)) {
unknown's avatar
unknown committed
3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340
				/* Found the corresponding column */

				break;
			}
		}

		ut_a(j < form->fields);

		col_type = get_innobase_type_from_mysql_type(key_part->field);

		if (DATA_BLOB == col_type
		    || key_part->length < field->pack_length()) {

unknown's avatar
unknown committed
3341 3342 3343 3344 3345 3346 3347 3348
		        prefix_len = key_part->length;

			if (col_type == DATA_INT
			    || col_type == DATA_FLOAT
			    || col_type == DATA_DOUBLE
			    || col_type == DATA_DECIMAL) {
			        fprintf(stderr,
"InnoDB: error: MySQL is trying to create a column prefix index field\n"
unknown's avatar
unknown committed
3349 3350
"InnoDB: on an inappropriate data type. Table name %s, column name %s.\n",
				  table_name, key_part->field->field_name);
unknown's avatar
unknown committed
3351 3352 3353 3354 3355
			        
			        prefix_len = 0;
			}
		} else {
		        prefix_len = 0;
unknown's avatar
unknown committed
3356 3357 3358 3359 3360
		}

		if (prefix_len >= DICT_MAX_COL_PREFIX_LEN) {
			DBUG_RETURN(-1);
		}
unknown's avatar
unknown committed
3361

3362 3363
		/* We assume all fields should be sorted in ascending
		order, hence the '0': */
unknown's avatar
unknown committed
3364

3365
		dict_mem_index_add_field(index,
unknown's avatar
unknown committed
3366 3367
				(char*) key_part->field->field_name,
				0, prefix_len);
3368 3369 3370 3371
	}

	error = row_create_index_for_mysql(index, trx);

unknown's avatar
unknown committed
3372
	error = convert_error_code_to_mysql(error, NULL);
3373 3374 3375 3376 3377

	DBUG_RETURN(error);
}

/*********************************************************************
3378
Creates an index to an InnoDB table when the user has defined no
3379
primary index. */
3380 3381
static
int
3382 3383
create_clustered_index_when_no_primary(
/*===================================*/
3384
	trx_t*		trx,		/* in: InnoDB transaction handle */
3385 3386 3387
	const char*	table_name)	/* in: table name */
{
	dict_index_t*	index;
3388 3389
  	int 		error;

3390
	/* The first '0' below specifies that everything in InnoDB is
3391 3392
	currently created in file space 0 */

unknown's avatar
unknown committed
3393 3394 3395
	index = dict_mem_index_create((char*) table_name,
				      (char*) "GEN_CLUST_INDEX",
				      0, DICT_CLUSTERED, 0);
3396 3397
	error = row_create_index_for_mysql(index, trx);

unknown's avatar
unknown committed
3398
	error = convert_error_code_to_mysql(error, NULL);
3399

3400
	return(error);
3401 3402 3403
}

/*********************************************************************
3404
Creates a new table to an InnoDB database. */
3405 3406 3407 3408 3409 3410 3411 3412

int
ha_innobase::create(
/*================*/
					/* out: error number */
	const char*	name,		/* in: table name */
	TABLE*		form,		/* in: information on table
					columns and indexes */
3413 3414 3415
	HA_CREATE_INFO*	create_info)	/* in: more information of the
					created table, contains also the
					create statement string */
3416 3417 3418
{
	int		error;
	dict_table_t*	innobase_table;
unknown's avatar
unknown committed
3419
	trx_t*		parent_trx;
3420
	trx_t*		trx;
unknown's avatar
unknown committed
3421
	int		primary_key_no;
3422
	uint		i;
unknown's avatar
unknown committed
3423 3424
	char		name2[FN_REFLEN];
	char		norm_name[FN_REFLEN];
unknown's avatar
unknown committed
3425
	THD		*thd= current_thd;
3426

3427 3428
  	DBUG_ENTER("ha_innobase::create");

unknown's avatar
unknown committed
3429
	DBUG_ASSERT(thd != NULL);
unknown's avatar
unknown committed
3430

unknown's avatar
unknown committed
3431 3432 3433 3434
	if (form->fields > 1000) {
		/* The limit probably should be REC_MAX_N_FIELDS - 3 = 1020,
		but we play safe here */

unknown's avatar
unknown committed
3435
	     DBUG_RETURN(HA_ERR_TO_BIG_ROW);
unknown's avatar
unknown committed
3436 3437
	} 

unknown's avatar
unknown committed
3438 3439 3440 3441 3442 3443 3444 3445 3446 3447
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
	parent_trx = check_trx_exists(current_thd);

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	
	
3448
	trx = trx_allocate_for_mysql();
unknown's avatar
unknown committed
3449 3450 3451
		
	trx->mysql_thd = thd;
	trx->mysql_query_str = &((*thd).query);
3452

unknown's avatar
unknown committed
3453
	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
unknown's avatar
unknown committed
3454 3455 3456
		trx->check_foreigns = FALSE;
	}

unknown's avatar
unknown committed
3457
	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
unknown's avatar
unknown committed
3458 3459 3460
		trx->check_unique_secondary = FALSE;
	}

unknown's avatar
unknown committed
3461 3462 3463 3464 3465
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}
unknown's avatar
unknown committed
3466

unknown's avatar
unknown committed
3467
	fn_format(name2, name, "", "",2);	// Remove the .frm extension
3468 3469

	normalize_table_name(norm_name, name2);
3470

unknown's avatar
unknown committed
3471
	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
unknown's avatar
unknown committed
3472
	or lock waits can happen in it during a table create operation.
unknown's avatar
unknown committed
3473
	Drop table etc. do this latching in row0mysql.c. */
unknown's avatar
unknown committed
3474

unknown's avatar
unknown committed
3475
	row_mysql_lock_data_dictionary(trx);
unknown's avatar
unknown committed
3476 3477

	/* Create the table definition in InnoDB */
3478

unknown's avatar
unknown committed
3479 3480 3481
  	error = create_table_def(trx, form, norm_name);
  	
  	if (error) {
unknown's avatar
unknown committed
3482
		innobase_commit_low(trx);
3483

unknown's avatar
unknown committed
3484
		row_mysql_unlock_data_dictionary(trx);
3485 3486 3487 3488 3489 3490

  		trx_free_for_mysql(trx);

 		DBUG_RETURN(error);
 	}

3491 3492
	/* Look for a primary key */

unknown's avatar
unknown committed
3493 3494 3495
	primary_key_no= (table->primary_key != MAX_KEY ?
			 (int) table->primary_key : 
			 -1);
3496

3497 3498 3499
	/* Our function row_get_mysql_key_number_for_index assumes
	the primary key is always number 0, if it exists */

unknown's avatar
unknown committed
3500
	DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0);
3501

3502 3503
	/* Create the keys */

3504 3505 3506
	if (form->keys == 0 || primary_key_no == -1) {
		/* Create an index which is used as the clustered index;
		order the rows by their row id which is internally generated
3507
		by InnoDB */
3508

3509
		error = create_clustered_index_when_no_primary(trx,
3510
							norm_name);
3511
  		if (error) {
unknown's avatar
unknown committed
3512 3513
			innobase_commit_low(trx);

unknown's avatar
unknown committed
3514
			row_mysql_unlock_data_dictionary(trx);
3515

3516 3517 3518 3519
			trx_free_for_mysql(trx);

			DBUG_RETURN(error);
      		}
3520 3521 3522
	}

	if (primary_key_no != -1) {
3523
		/* In InnoDB the clustered index must always be created
3524
		first */
unknown's avatar
unknown committed
3525 3526
	    	if ((error = create_index(trx, form, norm_name,
					  (uint) primary_key_no))) {
unknown's avatar
unknown committed
3527 3528
			innobase_commit_low(trx);

unknown's avatar
unknown committed
3529
			row_mysql_unlock_data_dictionary(trx);
3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540

  			trx_free_for_mysql(trx);

			DBUG_RETURN(error);
      		}
      	}

	for (i = 0; i < form->keys; i++) {

		if (i != (uint) primary_key_no) {

unknown's avatar
unknown committed
3541
    			if ((error = create_index(trx, form, norm_name, i))) {
3542

unknown's avatar
unknown committed
3543
			  	innobase_commit_low(trx);
3544

unknown's avatar
unknown committed
3545
				row_mysql_unlock_data_dictionary(trx);
3546 3547 3548 3549 3550

  				trx_free_for_mysql(trx);

				DBUG_RETURN(error);
      			}
3551
      		}
3552
  	}
3553

3554 3555 3556 3557
	if (current_thd->query != NULL) {
  	
		error = row_table_add_foreign_constraints(trx,
					current_thd->query, norm_name);
3558

3559
		error = convert_error_code_to_mysql(error, NULL);
3560

3561 3562
		if (error) {
			innobase_commit_low(trx);
unknown's avatar
unknown committed
3563

3564
			row_mysql_unlock_data_dictionary(trx);
3565

3566
  			trx_free_for_mysql(trx);
3567

3568 3569
			DBUG_RETURN(error);
		}
3570 3571
	}

unknown's avatar
unknown committed
3572 3573
  	innobase_commit_low(trx);

unknown's avatar
unknown committed
3574
	row_mysql_unlock_data_dictionary(trx);
3575

unknown's avatar
Merge  
unknown committed
3576 3577 3578
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
3579

unknown's avatar
unknown committed
3580
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
3581

3582
	innobase_table = dict_table_get(norm_name, NULL);
3583

unknown's avatar
unknown committed
3584
	DBUG_ASSERT(innobase_table != 0);
3585

3586
	/* Tell the InnoDB server that there might be work for
3587 3588 3589 3590 3591 3592 3593 3594 3595 3596
	utility threads: */

	srv_active_wake_master_thread();

  	trx_free_for_mysql(trx);

	DBUG_RETURN(0);
}

/*********************************************************************
3597
Drops a table from an InnoDB database. Before calling this function,
unknown's avatar
unknown committed
3598 3599
MySQL calls innobase_commit to commit the transaction of the current user.
Then the current user cannot have locks set on the table. Drop table
3600 3601
operation inside InnoDB will remove all locks any user has on the table
inside InnoDB. */
3602 3603 3604 3605

int
ha_innobase::delete_table(
/*======================*/
unknown's avatar
unknown committed
3606 3607
				/* out: error number */
	const char*	name)	/* in: table name */
3608 3609 3610
{
	ulint	name_len;
	int	error;
unknown's avatar
unknown committed
3611
	trx_t*	parent_trx;
3612
	trx_t*	trx;
unknown's avatar
unknown committed
3613
	THD	*thd= current_thd;
3614
	char	norm_name[1000];
3615

3616 3617
  	DBUG_ENTER("ha_innobase::delete_table");

unknown's avatar
unknown committed
3618 3619 3620 3621 3622 3623 3624 3625 3626 3627
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
	parent_trx = check_trx_exists(current_thd);

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

unknown's avatar
unknown committed
3628 3629 3630 3631 3632 3633
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

3634 3635
	trx = trx_allocate_for_mysql();

unknown's avatar
unknown committed
3636 3637 3638
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);

unknown's avatar
unknown committed
3639 3640 3641 3642 3643 3644 3645 3646
	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
		trx->check_unique_secondary = FALSE;
	}

3647 3648 3649
	name_len = strlen(name);

	assert(name_len < 1000);
3650

3651 3652
	/* Strangely, MySQL passes the table name without the '.frm'
	extension, in contrast to ::create */
3653

3654 3655
	normalize_table_name(norm_name, name);

3656
  	/* Drop the table in InnoDB */
3657

3658 3659
	error = row_drop_table_for_mysql(norm_name, trx,
		thd->lex.sql_command == SQLCOM_DROP_DB);
3660

unknown's avatar
Merge  
unknown committed
3661 3662 3663
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
3664

unknown's avatar
unknown committed
3665
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
3666

3667
	/* Tell the InnoDB server that there might be work for
3668 3669 3670 3671
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
3672
  	innobase_commit_low(trx);
unknown's avatar
unknown committed
3673

3674 3675
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
3676
	error = convert_error_code_to_mysql(error, NULL);
3677 3678 3679 3680

	DBUG_RETURN(error);
}

3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693
/*********************************************************************
Removes all tables in the named database inside InnoDB. */

int
innobase_drop_database(
/*===================*/
			/* out: error number */
	char*	path)	/* in: database path; inside InnoDB the name
			of the last directory in the path is used as
			the database name: for example, in 'mysql/data/test'
			the database name is 'test' */
{
	ulint	len		= 0;
unknown's avatar
unknown committed
3694
	trx_t*	parent_trx;
3695 3696 3697
	trx_t*	trx;
	char*	ptr;
	int	error;
3698
	char*	namebuf;
unknown's avatar
unknown committed
3699

unknown's avatar
unknown committed
3700 3701 3702 3703 3704 3705 3706 3707 3708 3709
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
	parent_trx = check_trx_exists(current_thd);

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

3710
	ptr = strend(path) - 2;
unknown's avatar
unknown committed
3711

3712 3713 3714 3715 3716 3717
	while (ptr >= path && *ptr != '\\' && *ptr != '/') {
		ptr--;
		len++;
	}

	ptr++;
3718
	namebuf = my_malloc(len + 2, MYF(0));
3719 3720 3721 3722

	memcpy(namebuf, ptr, len);
	namebuf[len] = '/';
	namebuf[len + 1] = '\0';
unknown's avatar
unknown committed
3723
#ifdef __WIN__
unknown's avatar
unknown committed
3724
	casedn_str(namebuf);
unknown's avatar
unknown committed
3725
#endif
3726
	trx = trx_allocate_for_mysql();
unknown's avatar
unknown committed
3727 3728
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
3729

unknown's avatar
unknown committed
3730 3731 3732 3733
	if (current_thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

3734
  	error = row_drop_database_for_mysql(namebuf, trx);
3735
	my_free(namebuf, MYF(0));
3736

unknown's avatar
Merge  
unknown committed
3737 3738 3739
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
3740

unknown's avatar
unknown committed
3741
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
3742

3743 3744 3745 3746 3747
	/* Tell the InnoDB server that there might be work for
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
3748
  	innobase_commit_low(trx);
3749 3750
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
3751
	error = convert_error_code_to_mysql(error, NULL);
3752 3753 3754 3755

	return(error);
}

3756
/*************************************************************************
3757
Renames an InnoDB table. */
3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768

int
ha_innobase::rename_table(
/*======================*/
				/* out: 0 or error code */
	const char*	from,	/* in: old name of the table */
	const char*	to)	/* in: new name of the table */
{
	ulint	name_len1;
	ulint	name_len2;
	int	error;
unknown's avatar
unknown committed
3769
	trx_t*	parent_trx;
3770
	trx_t*	trx;
3771 3772
	char	norm_from[1000];
	char	norm_to[1000];
3773

3774 3775
  	DBUG_ENTER("ha_innobase::rename_table");

unknown's avatar
unknown committed
3776 3777 3778 3779 3780 3781 3782 3783 3784 3785
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
	parent_trx = check_trx_exists(current_thd);

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

unknown's avatar
unknown committed
3786 3787 3788 3789 3790 3791
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

3792
	trx = trx_allocate_for_mysql();
unknown's avatar
unknown committed
3793 3794
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
3795 3796 3797 3798 3799 3800

	name_len1 = strlen(from);
	name_len2 = strlen(to);

	assert(name_len1 < 1000);
	assert(name_len2 < 1000);
3801

3802 3803 3804
	normalize_table_name(norm_from, from);
	normalize_table_name(norm_to, to);

3805
  	/* Rename the table in InnoDB */
3806

3807
  	error = row_rename_table_for_mysql(norm_from, norm_to, trx);
3808

unknown's avatar
Merge  
unknown committed
3809 3810 3811
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
3812

unknown's avatar
unknown committed
3813
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
3814

3815
	/* Tell the InnoDB server that there might be work for
3816 3817 3818 3819
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
3820
  	innobase_commit_low(trx);
3821 3822
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
3823
	error = convert_error_code_to_mysql(error, NULL);
3824 3825 3826 3827 3828 3829 3830 3831 3832 3833

	DBUG_RETURN(error);
}

/*************************************************************************
Estimates the number of index records in a range. */

ha_rows
ha_innobase::records_in_range(
/*==========================*/
unknown's avatar
unknown committed
3834 3835
						/* out: estimated number of
						rows */
3836
	int 			keynr,		/* in: index number */
3837
	const mysql_byte*	start_key,	/* in: start key value of the
3838 3839 3840 3841 3842
						range, may also be empty */
	uint 			start_key_len,	/* in: start key val len, may
						also be 0 */
	enum ha_rkey_function 	start_search_flag,/* in: start search condition
						e.g., 'greater than' */
3843
	const mysql_byte*	end_key,	/* in: range end key val, may
3844 3845 3846 3847 3848 3849 3850 3851
						also be empty */
	uint 			end_key_len,	/* in: range end key val len,
						may also be 0 */
	enum ha_rkey_function 	end_search_flag)/* in: range end search cond */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	KEY*		key;
	dict_index_t*	index;
3852
	mysql_byte*	key_val_buff2 	= (mysql_byte*) my_malloc(
unknown's avatar
unknown committed
3853 3854
						  table->reclength
      						+ table->max_key_length + 100,
3855
								MYF(MY_WME));
unknown's avatar
unknown committed
3856 3857
	ulint		buff2_len = table->reclength
      						+ table->max_key_length + 100;
3858
	dtuple_t*	range_start;
3859
	dtuple_t*	range_end;
unknown's avatar
unknown committed
3860
	ib_longlong	n_rows;
3861 3862
	ulint		mode1;
	ulint		mode2;
3863 3864
	void*           heap1;
	void*           heap2;
3865

3866
   	DBUG_ENTER("records_in_range");
3867

unknown's avatar
unknown committed
3868 3869 3870 3871 3872 3873
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

unknown's avatar
unknown committed
3874 3875
	prebuilt->trx->op_info = (char*)"estimating records in index range";

unknown's avatar
unknown committed
3876 3877 3878 3879
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
unknown's avatar
unknown committed
3880

3881 3882 3883
	active_index = keynr;

	key = table->key_info + active_index;
3884

3885
	index = dict_table_get_index_noninline(prebuilt->table, key->name);
3886

3887
	range_start = dtuple_create_for_mysql(&heap1, key->key_parts);
3888
 	dict_index_copy_types(range_start, index, key->key_parts);
3889

3890
	range_end = dtuple_create_for_mysql(&heap2, key->key_parts);
3891
 	dict_index_copy_types(range_end, index, key->key_parts);
3892

3893
	row_sel_convert_mysql_key_to_innobase(
unknown's avatar
unknown committed
3894 3895 3896
				range_start, (byte*) key_val_buff,
				(ulint)upd_and_key_val_buff_len,
				index,
3897 3898
				(byte*) start_key,
				(ulint) start_key_len);
3899

3900
	row_sel_convert_mysql_key_to_innobase(
unknown's avatar
unknown committed
3901 3902
				range_end, (byte*) key_val_buff2,
				buff2_len, index,
3903 3904 3905
				(byte*) end_key,
				(ulint) end_key_len);

3906 3907 3908
	mode1 = convert_search_mode_to_innobase(start_search_flag);
	mode2 = convert_search_mode_to_innobase(end_search_flag);

3909
	n_rows = btr_estimate_n_rows_in_range(index, range_start,
3910
						mode1, range_end, mode2);
3911 3912
	dtuple_free_for_mysql(heap1);
	dtuple_free_for_mysql(heap2);
3913

3914 3915
    	my_free((char*) key_val_buff2, MYF(0));

unknown's avatar
unknown committed
3916 3917
	prebuilt->trx->op_info = (char*)"";

unknown's avatar
unknown committed
3918 3919 3920 3921 3922 3923 3924 3925 3926 3927
	/* The MySQL optimizer seems to believe an estimate of 0 rows is
	always accurate and may return the result 'Empty set' based on that.
	The accuracy is not guaranteed, and even if it were, for a locking
	read we should anyway perform the search to set the next-key lock.
	Add 1 to the value to make sure MySQL does not make the assumption! */

	if (n_rows == 0) {
	        n_rows = 1;
	}

3928 3929 3930
	DBUG_RETURN((ha_rows) n_rows);
}

3931 3932
/*************************************************************************
Gives an UPPER BOUND to the number of rows in a table. This is used in
unknown's avatar
unknown committed
3933
filesort.cc. */
3934 3935 3936 3937

ha_rows
ha_innobase::estimate_number_of_rows(void)
/*======================================*/
3938
			/* out: upper bound of rows */
3939 3940
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
3941 3942
	dict_index_t*	index;
	ulonglong	estimate;
3943
	ulonglong	local_data_file_length;
unknown's avatar
unknown committed
3944

unknown's avatar
unknown committed
3945
 	DBUG_ENTER("estimate_number_of_rows");
3946

unknown's avatar
unknown committed
3947 3948 3949 3950 3951 3952
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

unknown's avatar
unknown committed
3953 3954 3955
	prebuilt->trx->op_info = (char*)
	                         "calculating upper bound for table rows";

unknown's avatar
unknown committed
3956 3957 3958 3959 3960
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);

3961
	index = dict_table_get_first_index_noninline(prebuilt->table);
unknown's avatar
unknown committed
3962

3963
	local_data_file_length = ((ulonglong) index->stat_n_leaf_pages)
3964
    							* UNIV_PAGE_SIZE;
3965

unknown's avatar
unknown committed
3966 3967
	/* Calculate a minimum length for a clustered index record and from
	that an upper bound for the number of rows. Since we only calculate
unknown's avatar
unknown committed
3968 3969
	new statistics in row0mysql.c when a table has grown by a threshold
	factor, we must add a safety factor 2 in front of the formula below. */
unknown's avatar
unknown committed
3970

unknown's avatar
unknown committed
3971 3972
	estimate = 2 * local_data_file_length /
					 dict_index_calc_min_rec_len(index);
unknown's avatar
unknown committed
3973

unknown's avatar
unknown committed
3974 3975
	prebuilt->trx->op_info = (char*)"";

unknown's avatar
unknown committed
3976
	DBUG_RETURN((ha_rows) estimate);
3977 3978
}

3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990
/*************************************************************************
How many seeks it will take to read through the table. This is to be
comparable to the number returned by records_in_range so that we can
decide if we should scan the table or use keys. */

double
ha_innobase::scan_time()
/*====================*/
			/* out: estimated time measured in disk seeks */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;

unknown's avatar
unknown committed
3991 3992 3993 3994 3995 3996
	/* Since MySQL seems to favor table scans too much over index
	searches, we pretend that a sequential read takes the same time
	as a random disk read, that is, we do not divide the following
	by 10, which would be physically realistic. */
	
	return((double) (prebuilt->table->stat_clustered_index_size));
3997 3998
}

unknown's avatar
unknown committed
3999 4000 4001
/**********************************************************************
Calculate the time it takes to read a set of ranges through an index
This enables us to optimise reads for clustered indexes. */
unknown's avatar
unknown committed
4002

unknown's avatar
unknown committed
4003 4004 4005 4006 4007 4008 4009
double
ha_innobase::read_time(
/*===================*/
			/* out: estimated time measured in disk seeks */
	uint    index,	/* in: key number */
	uint	ranges,	/* in: how many ranges */
	ha_rows rows)	/* in: estimated number of rows in the ranges */
unknown's avatar
unknown committed
4010
{
unknown's avatar
unknown committed
4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028
	ha_rows total_rows;
	double  time_for_scan;
  
	if (index != table->primary_key)
	  return handler::read_time(index, ranges, rows); // Not clustered

	if (rows <= 2)
	  return (double) rows;

	/* Assume that the read time is proportional to the scan time for all
	rows + at most one seek per range. */

	time_for_scan= scan_time();

	if ((total_rows= estimate_number_of_rows()) < rows)
	  return time_for_scan;

	return (ranges + (double) rows / (double) total_rows * time_for_scan);
unknown's avatar
unknown committed
4029 4030
}

4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042
/*************************************************************************
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object. */

void
ha_innobase::info(
/*==============*/
	uint flag)	/* in: what information MySQL requests */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	dict_table_t*	ib_table;
	dict_index_t*	index;
4043
	ha_rows		rec_per_key;
4044 4045
	ulong		j;
	ulong		i;
4046

4047 4048
 	DBUG_ENTER("info");

unknown's avatar
unknown committed
4049 4050 4051 4052 4053 4054 4055 4056 4057
        /* If we are forcing recovery at a high level, we will suppress
	statistics calculation on tables, because that may crash the
	server if an index is badly corrupted. */

        if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {

                return;
        }

unknown's avatar
unknown committed
4058 4059 4060 4061 4062 4063 4064 4065 4066
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

unknown's avatar
unknown committed
4067 4068
	prebuilt->trx->op_info = (char*)"returning various info to MySQL";

unknown's avatar
unknown committed
4069
	trx_search_latch_release_if_reserved(prebuilt->trx);
unknown's avatar
unknown committed
4070

4071 4072 4073 4074 4075 4076
 	ib_table = prebuilt->table;

 	if (flag & HA_STATUS_TIME) {
 		/* In sql_show we call with this flag: update then statistics
 		so that they are up-to-date */

unknown's avatar
unknown committed
4077 4078
	        prebuilt->trx->op_info = (char*)"updating table statistics";

4079
 		dict_update_statistics(ib_table);
unknown's avatar
unknown committed
4080 4081 4082

		prebuilt->trx->op_info = (char*)
		                          "returning various info to MySQL";
4083 4084 4085
 	}

	if (flag & HA_STATUS_VARIABLE) {
4086
    		records = (ha_rows)ib_table->stat_n_rows;
4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099
    		deleted = 0;
    		data_file_length = ((ulonglong)
				ib_table->stat_clustered_index_size)
    					* UNIV_PAGE_SIZE;
    		index_file_length = ((ulonglong)
				ib_table->stat_sum_of_other_index_sizes)
    					* UNIV_PAGE_SIZE;
    		delete_length = 0;
    		check_time = 0;

    		if (records == 0) {
    			mean_rec_length = 0;
    		} else {
4100
    			mean_rec_length = (ulong) (data_file_length / records);
4101 4102 4103 4104 4105 4106 4107 4108 4109
    		}
    	}

	if (flag & HA_STATUS_CONST) {
		index = dict_table_get_first_index_noninline(ib_table);

		if (prebuilt->clust_index_was_generated) {
			index = dict_table_get_next_index_noninline(index);
		}
4110

4111
		for (i = 0; i < table->keys; i++) {
unknown's avatar
unknown committed
4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122
			if (index == NULL) {
				ut_print_timestamp(stderr);
			        fprintf(stderr,
"  InnoDB: Error: table %s contains less indexes inside InnoDB\n"
"InnoDB: than are defined in the MySQL .frm file. Have you mixed up\n"
"InnoDB: .frm files from different installations? See section\n"
"InnoDB: 15.1 at http://www.innodb.com/ibman.html\n",
				   ib_table->name);
				break;
			}

4123 4124
			for (j = 0; j < table->key_info[i].key_parts; j++) {

unknown's avatar
unknown committed
4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137
				if (j + 1 > index->n_uniq) {
				        ut_print_timestamp(stderr);
			                fprintf(stderr,
"  InnoDB: Error: index %s of %s has %lu columns unique inside InnoDB\n"
"InnoDB: but MySQL is asking statistics for %lu columns. Have you mixed up\n"
"InnoDB: .frm files from different installations? See section\n"
"InnoDB: 15.1 at http://www.innodb.com/ibman.html\n",
						index->name,
						ib_table->name, index->n_uniq,
						j + 1);
				        break;
				}

4138 4139 4140 4141
				if (index->stat_n_diff_key_vals[j + 1] == 0) {

					rec_per_key = records;
				} else {
4142
					rec_per_key = (ha_rows)(records /
4143 4144 4145
   				         index->stat_n_diff_key_vals[j + 1]);
				}

unknown's avatar
unknown committed
4146 4147 4148 4149 4150 4151 4152
				/* Since MySQL seems to favor table scans
				too much over index searches, we pretend
				index selectivity is 2 times better than
				our estimate: */

				rec_per_key = rec_per_key / 2;

4153 4154 4155
				if (rec_per_key == 0) {
					rec_per_key = 1;
				}
unknown's avatar
unknown committed
4156

4157 4158 4159
 				table->key_info[i].rec_per_key[j]=
				  rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
				  rec_per_key;
4160
			}
unknown's avatar
unknown committed
4161

4162
			index = dict_table_get_next_index_noninline(index);
4163 4164
		}
	}
4165 4166

  	if (flag & HA_STATUS_ERRKEY) {
unknown's avatar
unknown committed
4167 4168
		ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);

4169
		errkey = (unsigned int) row_get_mysql_key_number_for_index(
4170 4171
				       (dict_index_t*)
				       trx_get_error_info(prebuilt->trx));
4172 4173
  	}

unknown's avatar
unknown committed
4174 4175
	prebuilt->trx->op_info = (char*)"";

4176 4177 4178
  	DBUG_VOID_RETURN;
}

unknown's avatar
unknown committed
4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195
/**************************************************************************
Updates index cardinalities of the table, based on 10 random dives into
each index tree. This does NOT calculate exact statistics of the table. */

int
ha_innobase::analyze(
/*=================*/			 
					/* out: returns always 0 (success) */
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
{
	/* Simply call ::info() with all the flags */
	info(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE);

	return(0);
}

4196 4197 4198 4199 4200 4201

int ha_innobase::optimize(THD* thd, HA_CHECK_OPT* check_opt)
{
  return ha_innobase::analyze(thd,check_opt);
}

unknown's avatar
unknown committed
4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217
/***********************************************************************
Tries to check that an InnoDB table is not corrupted. If corruption is
noticed, prints to stderr information about it. In case of corruption
may also assert a failure and crash the server. */

int
ha_innobase::check(
/*===============*/
					/* out: HA_ADMIN_CORRUPT or
					HA_ADMIN_OK */
	THD* 		thd,		/* in: user thread handle */
	HA_CHECK_OPT* 	check_opt)	/* in: check options, currently
					ignored */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	ulint		ret;
unknown's avatar
unknown committed
4218

unknown's avatar
unknown committed
4219
	ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
unknown's avatar
unknown committed
4220 4221
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
4222

unknown's avatar
unknown committed
4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234
	if (prebuilt->mysql_template == NULL) {
		/* Build the template; we will use a dummy template
		in index scans done in checking */

		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}

	ret = row_check_table_for_mysql(prebuilt);

	if (ret == DB_SUCCESS) {
		return(HA_ADMIN_OK);
	}
unknown's avatar
unknown committed
4235

unknown's avatar
unknown committed
4236 4237 4238
  	return(HA_ADMIN_CORRUPT); 
}

4239
/*****************************************************************
unknown's avatar
Merge  
unknown committed
4240 4241 4242
Adds information about free space in the InnoDB tablespace to a table comment
which is printed out when a user calls SHOW TABLE STATUS. Adds also info on
foreign keys. */
4243 4244 4245 4246

char*
ha_innobase::update_table_comment(
/*==============================*/
unknown's avatar
Merge  
unknown committed
4247 4248 4249
				/* out: table comment + InnoDB free space +
				info on foreign keys */
        const char*	comment)/* in: table comment defined by user */
4250
{
4251 4252 4253
	uint	length			= strlen(comment);
	char*				str;
	row_prebuilt_t*	prebuilt	= (row_prebuilt_t*)innobase_prebuilt;
4254

unknown's avatar
unknown committed
4255 4256 4257 4258
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

4259 4260 4261 4262
	if(length > 64000 - 3) {
		return((char*)comment); /* string too long */
	}

unknown's avatar
unknown committed
4263 4264
	update_thd(current_thd);

unknown's avatar
unknown committed
4265 4266
	prebuilt->trx->op_info = (char*)"returning table comment";

unknown's avatar
unknown committed
4267 4268 4269 4270
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283
	str = NULL;

	if (FILE* file = tmpfile()) {
		long	flen;

		/* output the data to a temporary file */
		fprintf(file, "InnoDB free: %lu kB",
			   (ulong) innobase_get_free_space());
		dict_print_info_on_foreign_keys(FALSE, file, prebuilt->table);
		flen = ftell(file);
		if(length + flen + 3 > 64000) {
			flen = 64000 - 3 - length;
		}
unknown's avatar
unknown committed
4284

4285
		ut_ad(flen > 0);
unknown's avatar
Merge  
unknown committed
4286

4287 4288
		/* allocate buffer for the full string, and
		read the contents of the temporary file */
4289

4290
		str = my_malloc(length + flen + 3, MYF(0));
4291

4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304
		if (str) {
			char* pos	= str + length;
			if(length) {
				memcpy(str, comment, length);
				*pos++ = ';';
				*pos++ = ' ';
			}
			rewind(file);
			flen = fread(pos, 1, flen, file);
			pos[flen] = 0;
		}

		fclose(file);
unknown's avatar
unknown committed
4305
	}
unknown's avatar
unknown committed
4306

unknown's avatar
unknown committed
4307 4308
        prebuilt->trx->op_info = (char*)"";

4309
  	return(str ? str : (char*) comment);
4310 4311
}

unknown's avatar
unknown committed
4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322
/***********************************************************************
Gets the foreign key create info for a table stored in InnoDB. */

char*
ha_innobase::get_foreign_key_create_info(void)
/*==========================================*/
			/* out, own: character string in the form which
			can be inserted to the CREATE TABLE statement,
			MUST be freed with ::free_foreign_key_create_info */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
4323
	char*	str	= 0;
unknown's avatar
unknown committed
4324

unknown's avatar
unknown committed
4325 4326
	ut_a(prebuilt != NULL);

unknown's avatar
unknown committed
4327 4328 4329 4330 4331 4332
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

4333 4334
	if (FILE* file = tmpfile()) {
		long	flen;
unknown's avatar
unknown committed
4335

4336
		prebuilt->trx->op_info = (char*)"getting info on foreign keys";
unknown's avatar
unknown committed
4337

4338 4339 4340
		/* In case MySQL calls this in the middle of a SELECT query,
		release possible adaptive hash latch to avoid
		deadlocks of threads */
4341

4342
		trx_search_latch_release_if_reserved(prebuilt->trx);
unknown's avatar
unknown committed
4343

4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371
		/* output the data to a temporary file */
		dict_print_info_on_foreign_keys(TRUE, file, prebuilt->table);
		prebuilt->trx->op_info = (char*)"";

		flen = ftell(file);
		if(flen > 64000 - 1) {
			flen = 64000 - 1;
		}

		ut_ad(flen >= 0);

		/* allocate buffer for the string, and
		read the contents of the temporary file */

		str = my_malloc(flen + 1, MYF(0));

		if (str) {
			rewind(file);
			flen = fread(str, 1, flen, file);
			str[flen] = 0;
		}

		fclose(file);
	} else {
		/* unable to create temporary file */
		str = my_malloc(1, MYF(0));
		str[0] = 0;
	}
unknown's avatar
unknown committed
4372

unknown's avatar
Merge  
unknown committed
4373
  	return(str);
unknown's avatar
unknown committed
4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395
}

/***********************************************************************
Checks if a table is referenced by a foreign key. The MySQL manual states that
a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
delete is then allowed internally to resolve a duplicate key conflict in
REPLACE, not an update. */

uint
ha_innobase::referenced_by_foreign_key(void)
/*========================================*/
			/* out: > 0 if referenced by a FOREIGN KEY */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;

	if (dict_table_referenced_by_foreign_key(prebuilt->table)) {

		return(1);
	}

	return(0);
}
unknown's avatar
unknown committed
4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406

/***********************************************************************
Frees the foreign key create info for a table stored in InnoDB, if it is
non-NULL. */

void
ha_innobase::free_foreign_key_create_info(
/*======================================*/
	char*	str)	/* in, own: create info string to free  */
{
	if (str) {
4407
		my_free(str, MYF(0));
unknown's avatar
unknown committed
4408
	}
4409 4410
}

unknown's avatar
unknown committed
4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430
/***********************************************************************
Tells something additional to the handler about how to do things. */

int
ha_innobase::extra(
/*===============*/
			   /* out: 0 or error number */
	enum ha_extra_function operation)
                           /* in: HA_EXTRA_DONT_USE_CURSOR_TO_UPDATE */
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;

	/* Warning: since it is not sure that MySQL calls external_lock
	before calling this function, the trx field in prebuilt can be
	obsolete! */

	switch (operation) {
 		case HA_EXTRA_RESET:
  		case HA_EXTRA_RESET_STATE:
	        	prebuilt->read_just_key = 0;
unknown's avatar
unknown committed
4431
    	        	break;
unknown's avatar
unknown committed
4432 4433 4434 4435
		case HA_EXTRA_NO_KEYREAD:
    			prebuilt->read_just_key = 0;
    			break;
	        case HA_EXTRA_DONT_USE_CURSOR_TO_UPDATE:
unknown's avatar
unknown committed
4436
			prebuilt->hint_no_need_to_fetch_extra_cols = FALSE;
unknown's avatar
unknown committed
4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457
			break;
	        case HA_EXTRA_KEYREAD:
	        	prebuilt->read_just_key = 1;
	        	break;
		default:/* Do nothing */
			;
	}

	return(0);
}

/**********************************************************************
????????????? */

int
ha_innobase::reset(void)
/*====================*/
{
  	return(0);
}

unknown's avatar
unknown committed
4458
/**********************************************************************
unknown's avatar
unknown committed
4459 4460 4461 4462 4463
MySQL calls this function at the start of each SQL statement inside LOCK
TABLES. Inside LOCK TABLES the ::external_lock method does not work to
mark SQL statement borders. Note also a special case: if a temporary table
is created inside LOCK TABLES, MySQL has not called external_lock() at all
on that table. */
unknown's avatar
unknown committed
4464 4465

int
unknown's avatar
unknown committed
4466 4467
ha_innobase::start_stmt(
/*====================*/
unknown's avatar
unknown committed
4468 4469 4470 4471 4472 4473 4474 4475 4476 4477
	              /* out: 0 or error code */
	THD*    thd)  /* in: handle to the user thread */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

	update_thd(thd);

	trx = prebuilt->trx;

unknown's avatar
unknown committed
4478 4479 4480 4481 4482 4483 4484
	/* Here we release the search latch and the InnoDB thread FIFO ticket
	if they were reserved. They should have been released already at the
	end of the previous statement, but because inside LOCK TABLES the
	lock count method does not work to mark the end of a SELECT statement,
	that may not be the case. We MUST release the search latch before an
	INSERT, for example. */

unknown's avatar
unknown committed
4485 4486
	innobase_release_stat_resources(trx);

unknown's avatar
unknown committed
4487 4488 4489 4490 4491 4492 4493 4494
	if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
	    						&& trx->read_view) {
	    	/* At low transaction isolation levels we let
		each consistent read set its own snapshot */

	    	read_view_close_for_mysql(trx);
	}

unknown's avatar
unknown committed
4495 4496 4497 4498
	auto_inc_counter_for_this_stat = 0;
	prebuilt->sql_stat_start = TRUE;
	prebuilt->hint_no_need_to_fetch_extra_cols = TRUE;
	prebuilt->read_just_key = 0;
unknown's avatar
unknown committed
4499

4500
	if (!prebuilt->mysql_has_locked) {
unknown's avatar
unknown committed
4501 4502 4503 4504 4505 4506
	        /* This handle is for a temporary table created inside
	        this same LOCK TABLES; since MySQL does NOT call external_lock
	        in this case, we must use x-row locks inside InnoDB to be
	        prepared for an update of a row */
	  
	        prebuilt->select_lock_type = LOCK_X;
4507
	} else {
4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539
		/* When we first come here after LOCK TABLES,
		select_lock_type is set to LOCK_S or LOCK_X. Store the value
		in case we run also consistent reads and need to restore the
		value later. */

		if (prebuilt->select_lock_type != LOCK_NONE) {
			prebuilt->stored_select_lock_type =
					prebuilt->select_lock_type;
		}

		if (prebuilt->stored_select_lock_type != LOCK_S
		    && prebuilt->stored_select_lock_type != LOCK_X) {
			fprintf(stderr,
"InnoDB: Error: select_lock_type is %lu inside ::start_stmt()!\n",
			prebuilt->stored_select_lock_type);

			ut_error;
		}

		if (thd->lex.sql_command == SQLCOM_SELECT
					&& thd->lex.lock_option == TL_READ) {
	
			/* For other than temporary tables, we obtain
			no lock for consistent read (plain SELECT) */

			prebuilt->select_lock_type = LOCK_NONE;
		} else {
			/* Not a consistent read: restore the
			select_lock_type value */
			prebuilt->select_lock_type =
				prebuilt->stored_select_lock_type;
		}
unknown's avatar
unknown committed
4540
	}
unknown's avatar
unknown committed
4541 4542
	
	/* Set the MySQL flag to mark that there is an active transaction */
unknown's avatar
unknown committed
4543
	thd->transaction.all.innodb_active_trans = 1;
unknown's avatar
unknown committed
4544 4545

	return(0);
unknown's avatar
unknown committed
4546 4547
}

unknown's avatar
unknown committed
4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558
/**********************************************************************
Maps a MySQL trx isolation level code to the InnoDB isolation level code */
inline
ulint
innobase_map_isolation_level(
/*=========================*/
					/* out: InnoDB isolation level */
	enum_tx_isolation	iso)	/* in: MySQL isolation level code */
{
	switch(iso) {
		case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
unknown's avatar
unknown committed
4559
		case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
unknown's avatar
unknown committed
4560 4561 4562 4563 4564 4565
		case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
		case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
		default: ut_a(0); return(0);
	}	
}
	
unknown's avatar
unknown committed
4566 4567
/**********************************************************************
As MySQL will execute an external lock for every new table it uses when it
unknown's avatar
unknown committed
4568 4569 4570
starts to process an SQL statement (an exception is when MySQL calls
start_stmt for the handle) we can use this function to store the pointer to
the THD in the handle. We will also use this function to communicate
unknown's avatar
unknown committed
4571 4572 4573 4574 4575 4576 4577
to InnoDB that a new SQL statement has started and that we must store a
savepoint to our transaction handle, so that we are able to roll back
the SQL statement in case of an error. */

int
ha_innobase::external_lock(
/*=======================*/
4578
			        /* out: 0 */
unknown's avatar
unknown committed
4579 4580 4581 4582 4583 4584 4585
	THD*	thd,		/* in: handle to the user thread */
	int 	lock_type)	/* in: lock type */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

  	DBUG_ENTER("ha_innobase::external_lock");
unknown's avatar
unknown committed
4586
	DBUG_PRINT("enter",("lock_type: %d", lock_type));
unknown's avatar
unknown committed
4587 4588 4589 4590 4591 4592

	update_thd(thd);

	trx = prebuilt->trx;

	prebuilt->sql_stat_start = TRUE;
unknown's avatar
unknown committed
4593
	prebuilt->hint_no_need_to_fetch_extra_cols = TRUE;
unknown's avatar
unknown committed
4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604

	prebuilt->read_just_key = 0;

	if (lock_type == F_WRLCK) {

		/* If this is a SELECT, then it is in UPDATE TABLE ...
		or SELECT ... FOR UPDATE */
		prebuilt->select_lock_type = LOCK_X;
	}

	if (lock_type != F_UNLCK) {
unknown's avatar
unknown committed
4605
		/* MySQL is setting a new table lock */
unknown's avatar
unknown committed
4606

unknown's avatar
unknown committed
4607 4608
		/* Set the MySQL flag to mark that there is an active
		transaction */
unknown's avatar
unknown committed
4609
		thd->transaction.all.innodb_active_trans = 1;
unknown's avatar
unknown committed
4610

unknown's avatar
unknown committed
4611
		trx->n_mysql_tables_in_use++;
4612
		prebuilt->mysql_has_locked = TRUE;
unknown's avatar
unknown committed
4613

unknown's avatar
unknown committed
4614 4615
		if (trx->n_mysql_tables_in_use == 1) {
		        trx->isolation_level = innobase_map_isolation_level(
unknown's avatar
unknown committed
4616 4617
						(enum_tx_isolation)
						thd->variables.tx_isolation);
unknown's avatar
unknown committed
4618
		}
unknown's avatar
unknown committed
4619 4620

		if (trx->isolation_level == TRX_ISO_SERIALIZABLE
unknown's avatar
unknown committed
4621 4622 4623
		    && prebuilt->select_lock_type == LOCK_NONE
		    && (thd->options
				 & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
unknown's avatar
unknown committed
4624

unknown's avatar
unknown committed
4625
		    	/* To get serializable execution, we let InnoDB
unknown's avatar
unknown committed
4626
		    	conceptually add 'LOCK IN SHARE MODE' to all SELECTs
unknown's avatar
unknown committed
4627 4628 4629 4630 4631
			which otherwise would have been consistent reads. An
			exception is consistent reads in the AUTOCOMMIT=1 mode:
			we know that they are read-only transactions, and they
			can be serialized also if performed as consistent
			reads. */
unknown's avatar
unknown committed
4632 4633 4634 4635

			prebuilt->select_lock_type = LOCK_S;
		}

unknown's avatar
unknown committed
4636
		if (prebuilt->select_lock_type != LOCK_NONE) {
4637 4638 4639 4640 4641 4642 4643 4644 4645 4646
			if (thd->in_lock_tables) {
				ulint	error;
				error = row_lock_table_for_mysql(prebuilt);

				if (error != DB_SUCCESS) {
					error = convert_error_code_to_mysql(
						error, user_thd);
					DBUG_RETURN(error);
				}
			}
unknown's avatar
unknown committed
4647 4648 4649 4650

		  	trx->mysql_n_tables_locked++;
		}

4651
		DBUG_RETURN(0);
unknown's avatar
unknown committed
4652
	}
unknown's avatar
unknown committed
4653

unknown's avatar
unknown committed
4654
	/* MySQL is releasing a table lock */
unknown's avatar
unknown committed
4655

unknown's avatar
unknown committed
4656 4657 4658
	trx->n_mysql_tables_in_use--;
	prebuilt->mysql_has_locked = FALSE;
	auto_inc_counter_for_this_stat = 0;
4659 4660 4661
	if (trx->n_tables_locked) {
		row_unlock_table_for_mysql(trx);
	}
4662

unknown's avatar
unknown committed
4663 4664
	/* If the MySQL lock count drops to zero we know that the current SQL
	statement has ended */
unknown's avatar
unknown committed
4665

unknown's avatar
unknown committed
4666
	if (trx->n_mysql_tables_in_use == 0) {
unknown's avatar
unknown committed
4667

unknown's avatar
unknown committed
4668 4669 4670
	        trx->mysql_n_tables_locked = 0;
		prebuilt->used_in_HANDLER = FALSE;
			
unknown's avatar
unknown committed
4671 4672 4673 4674 4675 4676
		/* Release a possible FIFO ticket and search latch. Since we
		may reserve the kernel mutex, we have to release the search
		system latch first to obey the latching order. */

	        innobase_release_stat_resources(trx);

unknown's avatar
unknown committed
4677 4678 4679 4680 4681 4682
		if (!(thd->options
				 & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
			if (thd->transaction.all.innodb_active_trans != 0) {
		    	        innobase_commit(thd, trx);
			}
		} else {
unknown's avatar
unknown committed
4683 4684 4685
			if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
	    						&& trx->read_view) {

unknown's avatar
unknown committed
4686
				/* At low transaction isolation levels we let
unknown's avatar
unknown committed
4687 4688
				each consistent read set its own snapshot */

unknown's avatar
unknown committed
4689
				read_view_close_for_mysql(trx);
unknown's avatar
unknown committed
4690
			}
unknown's avatar
unknown committed
4691 4692 4693
		}
	}

4694
	DBUG_RETURN(0);
unknown's avatar
unknown committed
4695 4696
}

unknown's avatar
unknown committed
4697 4698 4699 4700 4701 4702 4703 4704 4705 4706
/****************************************************************************
Implements the SHOW INNODB STATUS command. Send the output of the InnoDB
Monitor to the client. */

int
innodb_show_status(
/*===============*/
	THD*	thd)	/* in: the MySQL query thread of the caller */
{
	String* 	packet 	= &thd->packet;
unknown's avatar
unknown committed
4707
	trx_t*		trx;
unknown's avatar
unknown committed
4708 4709

  	DBUG_ENTER("innodb_show_status");
unknown's avatar
unknown committed
4710
	
unknown's avatar
unknown committed
4711
	if (innodb_skip) {
4712
	        my_message(ER_NOT_SUPPORTED_YET,
unknown's avatar
unknown committed
4713
	  "Cannot call SHOW INNODB STATUS because skip-innodb is defined",
4714
			   MYF(0));
unknown's avatar
unknown committed
4715 4716
                DBUG_RETURN(-1);
        }
unknown's avatar
unknown committed
4717

unknown's avatar
unknown committed
4718 4719 4720 4721
	trx = check_trx_exists(thd);

	innobase_release_stat_resources(trx);

4722 4723 4724 4725 4726
	/* We let the InnoDB Monitor to output at most 64000 bytes of text. */

	long	flen;
	char*	str;

4727
	mutex_enter_noninline(&srv_monitor_file_mutex);
4728 4729 4730
	rewind(srv_monitor_file);
	srv_printf_innodb_monitor(srv_monitor_file);
	flen = ftell(srv_monitor_file);
4731
	os_file_set_eof(srv_monitor_file);
4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748
	if(flen > 64000 - 1) {
		flen = 64000 - 1;
	}

	ut_ad(flen > 0);

	/* allocate buffer for the string, and
	read the contents of the temporary file */

	str = my_malloc(flen + 1, MYF(0));

	if (str) {
		rewind(srv_monitor_file);
		flen = fread(str, 1, flen, srv_monitor_file);
		str[flen] = 0;
	}

4749
	mutex_exit_noninline(&srv_monitor_file_mutex);
4750

unknown's avatar
unknown committed
4751 4752
	List<Item> field_list;

4753
	field_list.push_back(new Item_empty_string("Status", flen));
unknown's avatar
unknown committed
4754

unknown's avatar
unknown committed
4755
	if (send_fields(thd, field_list, 1)) {
unknown's avatar
unknown committed
4756

4757
		my_free(str, MYF(0));
unknown's avatar
unknown committed
4758

4759
		DBUG_RETURN(-1);
unknown's avatar
unknown committed
4760 4761
	}

4762 4763 4764
	packet->length(0);
	net_store_data(packet, str);
	my_free(str, MYF(0));
unknown's avatar
unknown committed
4765

4766 4767
	if (my_net_write(&thd->net, (char*)thd->packet.ptr(),
					packet->length())) {
unknown's avatar
unknown committed
4768

4769 4770
		DBUG_RETURN(-1);
	}
unknown's avatar
unknown committed
4771

4772
	send_eof(&thd->net);
unknown's avatar
unknown committed
4773 4774 4775
  	DBUG_RETURN(0);
}

4776 4777 4778 4779 4780
/****************************************************************************
 Handling the shared INNOBASE_SHARE structure that is needed to provide table
 locking.
****************************************************************************/

4781
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
4782 4783 4784
			      my_bool not_used __attribute__((unused)))
{
  *length=share->table_name_length;
4785
  return (mysql_byte*) share->table_name;
4786 4787 4788 4789 4790 4791 4792
}

static INNOBASE_SHARE *get_share(const char *table_name)
{
  INNOBASE_SHARE *share;
  pthread_mutex_lock(&innobase_mutex);
  uint length=(uint) strlen(table_name);
unknown's avatar
unknown committed
4793
  if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables,
4794
					(mysql_byte*) table_name,
4795 4796 4797 4798 4799 4800 4801 4802
					    length)))
  {
    if ((share=(INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1,
				       MYF(MY_WME | MY_ZEROFILL))))
    {
      share->table_name_length=length;
      share->table_name=(char*) (share+1);
      strmov(share->table_name,table_name);
4803
      if (hash_insert(&innobase_open_tables, (mysql_byte*) share))
4804 4805 4806 4807 4808 4809
      {
	pthread_mutex_unlock(&innobase_mutex);
	my_free((gptr) share,0);
	return 0;
      }
      thr_lock_init(&share->lock);
4810
      pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST);
4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822
    }
  }
  share->use_count++;
  pthread_mutex_unlock(&innobase_mutex);
  return share;
}

static void free_share(INNOBASE_SHARE *share)
{
  pthread_mutex_lock(&innobase_mutex);
  if (!--share->use_count)
  {
4823
    hash_delete(&innobase_open_tables, (mysql_byte*) share);
4824 4825 4826 4827 4828 4829
    thr_lock_delete(&share->lock);
    pthread_mutex_destroy(&share->mutex);
    my_free((gptr) share, MYF(0));
  }
  pthread_mutex_unlock(&innobase_mutex);
}
4830 4831

/*********************************************************************
unknown's avatar
unknown committed
4832
Converts a MySQL table lock stored in the 'lock' field of the handle to
unknown's avatar
unknown committed
4833 4834 4835 4836 4837 4838
a proper type before storing pointer to the lock into an array of pointers.
MySQL also calls this if it wants to reset some table locks to a not-locked
state during the processing of an SQL query. An example is that during a
SELECT the read lock is released early on the 'const' tables where we only
fetch one row. MySQL does not call this when it releases all locks at the
end of an SQL statement. */
4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855

THR_LOCK_DATA**
ha_innobase::store_lock(
/*====================*/
						/* out: pointer to the next
						element in the 'to' array */
	THD*			thd,		/* in: user thread handle */
	THR_LOCK_DATA**		to,		/* in: pointer to an array
						of pointers to lock structs;
						pointer to the 'lock' field
						of current handle is stored
						next to this array */
	enum thr_lock_type 	lock_type)	/* in: lock type to store in
						'lock' */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;

unknown's avatar
unknown committed
4856 4857 4858 4859
	if (lock_type == TL_READ_WITH_SHARED_LOCKS ||
	    lock_type == TL_READ_NO_INSERT) {
		/* This is a SELECT ... IN SHARE MODE, or
		we are doing a complex SQL statement like
unknown's avatar
unknown committed
4860 4861
		INSERT INTO ... SELECT ... and the logical logging (MySQL
		binlog) requires the use of a locking read */
unknown's avatar
unknown committed
4862

4863
		prebuilt->select_lock_type = LOCK_S;
unknown's avatar
unknown committed
4864 4865 4866 4867 4868 4869 4870
	} else if (lock_type != TL_IGNORE) {

	        /* In ha_berkeley.cc there is a comment that MySQL
	        may in exceptional cases call this with TL_IGNORE also
	        when it is NOT going to release the lock. */

	        /* We set possible LOCK_X value in external_lock, not yet
4871
		here even if this would be SELECT ... FOR UPDATE */
unknown's avatar
unknown committed
4872

4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886
		prebuilt->select_lock_type = LOCK_NONE;
	}

	if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {

    		/* If we are not doing a LOCK TABLE, then allow multiple
		writers */

    		if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
	 	    lock_type <= TL_WRITE) && !thd->in_lock_tables) {

      			lock_type = TL_WRITE_ALLOW_WRITE;
      		}

unknown's avatar
unknown committed
4887 4888 4889 4890 4891 4892 4893 4894 4895 4896
		/* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
		MySQL would use the lock TL_READ_NO_INSERT on t2, and that
		would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
		to t2. Convert the lock to a normal read lock to allow
		concurrent inserts to t2. */
      		
		if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables) {
			lock_type = TL_READ;
		}
		
4897 4898 4899 4900
 		lock.type=lock_type;
  	}

  	*to++= &lock;
4901

4902 4903 4904
	return(to);
}

4905
/***********************************************************************
unknown's avatar
unknown committed
4906 4907 4908 4909
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
counter if it already has been initialized. In parameter ret returns
the value of the auto-inc counter. */
4910

unknown's avatar
unknown committed
4911 4912 4913 4914 4915 4916
int
ha_innobase::innobase_read_and_init_auto_inc(
/*=========================================*/
				/* out: 0 or error code: deadlock or
				lock wait timeout */
	longlong*	ret)	/* out: auto-inc value */
4917
{
unknown's avatar
unknown committed
4918
  	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
unknown's avatar
unknown committed
4919
    	longlong        auto_inc;
unknown's avatar
unknown committed
4920
  	int     	error;
4921

unknown's avatar
unknown committed
4922
  	ut_a(prebuilt);
unknown's avatar
unknown committed
4923 4924
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
4925 4926
	ut_a(prebuilt->table);
	
unknown's avatar
unknown committed
4927 4928 4929 4930 4931
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);

unknown's avatar
unknown committed
4932
	auto_inc = dict_table_autoinc_read(prebuilt->table);
unknown's avatar
unknown committed
4933

unknown's avatar
unknown committed
4934 4935 4936 4937 4938 4939
	if (auto_inc != 0) {
		/* Already initialized */
		*ret = auto_inc;
	
		return(0);
	}
4940

unknown's avatar
unknown committed
4941
	error = row_lock_table_autoinc_for_mysql(prebuilt);
unknown's avatar
unknown committed
4942

unknown's avatar
unknown committed
4943 4944
	if (error != DB_SUCCESS) {
		error = convert_error_code_to_mysql(error, user_thd);
4945

unknown's avatar
unknown committed
4946 4947
		goto func_exit;
	}	
unknown's avatar
unknown committed
4948

unknown's avatar
unknown committed
4949 4950
	/* Check again if someone has initialized the counter meanwhile */
	auto_inc = dict_table_autoinc_read(prebuilt->table);
unknown's avatar
unknown committed
4951

unknown's avatar
unknown committed
4952 4953 4954 4955
	if (auto_inc != 0) {
		*ret = auto_inc;
	
		return(0);
unknown's avatar
unknown committed
4956
	}
4957

unknown's avatar
unknown committed
4958 4959 4960 4961 4962 4963 4964 4965 4966
  	(void) extra(HA_EXTRA_KEYREAD);
  	index_init(table->next_number_index);

	/* We use an exclusive lock when we read the max key value from the
  	auto-increment column index. This is because then build_template will
  	advise InnoDB to fetch all columns. In SHOW TABLE STATUS the query
  	id of the auto-increment column is not changed, and previously InnoDB
  	did not fetch it, causing SHOW TABLE STATUS to show wrong values
  	for the autoinc column. */
4967

unknown's avatar
unknown committed
4968
  	prebuilt->select_lock_type = LOCK_X;
4969

unknown's avatar
unknown committed
4970 4971 4972 4973
  	/* Play safe and also give in another way the hint to fetch
  	all columns in the key: */
  	
	prebuilt->hint_no_need_to_fetch_extra_cols = FALSE;
4974

unknown's avatar
unknown committed
4975
	prebuilt->trx->mysql_n_tables_locked += 1;
4976
  
unknown's avatar
unknown committed
4977
	error = index_last(table->record[1]);
4978

unknown's avatar
unknown committed
4979
  	if (error) {
unknown's avatar
unknown committed
4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990
		if (error == HA_ERR_END_OF_FILE) {
			/* The table was empty, initialize to 1 */
			auto_inc = 1;

			error = 0;
		} else {
			/* Deadlock or a lock wait timeout */
  			auto_inc = -1;

  			goto func_exit;
  		}
unknown's avatar
unknown committed
4991
  	} else {
unknown's avatar
unknown committed
4992 4993
		/* Initialize to max(col) + 1 */
    		auto_inc = (longlong) table->next_number_field->
unknown's avatar
unknown committed
4994 4995
                        	val_int_offset(table->rec_buff_length) + 1;
  	}
4996

unknown's avatar
unknown committed
4997 4998 4999
	dict_table_autoinc_initialize(prebuilt->table, auto_inc);

func_exit:
unknown's avatar
unknown committed
5000
  	(void) extra(HA_EXTRA_NO_KEYREAD);
5001

unknown's avatar
unknown committed
5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029
	index_end();

	*ret = auto_inc;

  	return(error);
}

/***********************************************************************
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
counter if it already has been initialized. Returns the value of the
auto-inc counter. */

longlong
ha_innobase::get_auto_increment()
/*=============================*/
                         /* out: auto-increment column value, -1 if error
                         (deadlock or lock wait timeout) */
{
  	longlong        nr;
  	int     	error;
	
	error = innobase_read_and_init_auto_inc(&nr);

	if (error) {

		return(-1);
	}
5030

unknown's avatar
unknown committed
5031
	return(nr);
5032 5033
}

5034
#endif /* HAVE_INNOBASE_DB */