ha_innodb.cc 154 KB
Newer Older
unknown's avatar
unknown committed
1
/* Copyright (C) 2000 MySQL AB & Innobase Oy
2

3 4 5 6
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
7

8 9 10 11
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
12

13 14 15 16
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

unknown's avatar
unknown committed
17
/* This file defines the InnoDB handler: the interface between MySQL and
18 19 20
InnoDB
NOTE: You can only use noninlined InnoDB functions in this file, because we
have disables the InnoDB inlining in this file. */
21

unknown's avatar
unknown committed
22
/* TODO list for the InnoDB handler in 4.1:
23 24 25
  - Remove the flag innodb_active_trans from thd and replace it with a
    function call innodb_active_trans(thd), which looks at the InnoDB
    trx struct state field
unknown's avatar
unknown committed
26 27 28
  - Find out what kind of problems the OS X case-insensitivity causes to
    table and database names; should we 'normalize' the names like we do
    in Windows?
29
*/
unknown's avatar
unknown committed
30

31 32 33 34 35
#ifdef __GNUC__
#pragma implementation				// gcc: Class implementation
#endif

#include "mysql_priv.h"
unknown's avatar
unknown committed
36
#include "slave.h"
unknown's avatar
unknown committed
37

38 39 40 41
#ifdef HAVE_INNOBASE_DB
#include <m_ctype.h>
#include <hash.h>
#include <myisampack.h>
unknown's avatar
unknown committed
42
#include <mysys_err.h>
43
#include <my_sys.h>
44

45 46
#define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1))

47
#include "ha_innodb.h"
unknown's avatar
unknown committed
48

unknown's avatar
unknown committed
49
pthread_mutex_t innobase_mutex;
50
bool innodb_inited= 0;
unknown's avatar
unknown committed
51

52
/* Store MySQL definition of 'byte': in Linux it is char while InnoDB
53 54 55
uses unsigned char; the header univ.i which we include next defines
'byte' as a macro which expands to 'unsigned char' */

56
typedef byte	mysql_byte;
unknown's avatar
unknown committed
57

unknown's avatar
unknown committed
58 59
#define INSIDE_HA_INNOBASE_CC

60
/* Include necessary InnoDB headers */
61
extern "C" {
unknown's avatar
unknown committed
62
#include "../innobase/include/univ.i"
unknown's avatar
unknown committed
63
#include "../innobase/include/os0file.h"
unknown's avatar
unknown committed
64
#include "../innobase/include/os0thread.h"
unknown's avatar
unknown committed
65 66 67 68
#include "../innobase/include/srv0start.h"
#include "../innobase/include/srv0srv.h"
#include "../innobase/include/trx0roll.h"
#include "../innobase/include/trx0trx.h"
unknown's avatar
unknown committed
69
#include "../innobase/include/trx0sys.h"
70
#include "../innobase/include/mtr0mtr.h"
unknown's avatar
unknown committed
71 72 73 74 75
#include "../innobase/include/row0ins.h"
#include "../innobase/include/row0mysql.h"
#include "../innobase/include/row0sel.h"
#include "../innobase/include/row0upd.h"
#include "../innobase/include/log0log.h"
76
#include "../innobase/include/lock0lock.h"
unknown's avatar
unknown committed
77 78 79
#include "../innobase/include/dict0crea.h"
#include "../innobase/include/btr0cur.h"
#include "../innobase/include/btr0btr.h"
80
#include "../innobase/include/fsp0fsp.h"
81
#include "../innobase/include/sync0sync.h"
unknown's avatar
unknown committed
82
#include "../innobase/include/fil0fil.h"
83 84 85 86 87
}

#define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */
#define HA_INNOBASE_RANGE_COUNT	  100

88 89
uint 	innobase_init_flags 	= 0;
ulong 	innobase_cache_size 	= 0;
90

unknown's avatar
unknown committed
91 92 93
/* The default values for the following, type long, start-up parameters
are declared in mysqld.cc: */

94
long innobase_mirrored_log_groups, innobase_log_files_in_group,
95
     innobase_log_file_size, innobase_log_buffer_size,
unknown's avatar
unknown committed
96 97
     innobase_buffer_pool_awe_mem_mb,
     innobase_buffer_pool_size, innobase_additional_mem_pool_size,
unknown's avatar
Merge  
unknown committed
98
     innobase_file_io_threads, innobase_lock_wait_timeout,
unknown's avatar
unknown committed
99
     innobase_thread_concurrency, innobase_force_recovery,
100
     innobase_open_files;
unknown's avatar
unknown committed
101

unknown's avatar
unknown committed
102 103
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
unknown's avatar
unknown committed
104
  
unknown's avatar
unknown committed
105
char*	innobase_data_home_dir			= NULL;
unknown's avatar
unknown committed
106
char*	innobase_data_file_path 		= NULL;
unknown's avatar
unknown committed
107
char*	innobase_log_group_home_dir		= NULL;
unknown's avatar
unknown committed
108
char*	innobase_log_arch_dir			= NULL;/* unused */
unknown's avatar
unknown committed
109 110
/* The following has a misleading name: starting from 4.0.5, this also
affects Windows: */
unknown's avatar
unknown committed
111 112 113 114 115
char*	innobase_unix_file_flush_method		= NULL;

/* Below we have boolean-valued start-up parameters, and their default
values */

unknown's avatar
unknown committed
116
uint	innobase_flush_log_at_trx_commit	= 1;
unknown's avatar
unknown committed
117
my_bool innobase_log_archive			= FALSE;/* unused */
unknown's avatar
unknown committed
118 119
my_bool	innobase_use_native_aio			= FALSE;
my_bool	innobase_fast_shutdown			= TRUE;
120 121 122
my_bool innobase_very_fast_shutdown		= FALSE; /* this can be set to
							 1 just prior calling
							 innobase_end() */
unknown's avatar
unknown committed
123
my_bool	innobase_file_per_table			= FALSE;
124
my_bool innobase_locks_unsafe_for_binlog        = FALSE;
125
my_bool innobase_create_status_file		= FALSE;
126

unknown's avatar
unknown committed
127
static char *internal_innobase_data_file_path	= NULL;
128

129
/* The following counter is used to convey information to InnoDB
130 131 132 133 134
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
it every INNOBASE_WAKE_INTERVAL'th step. */

#define INNOBASE_WAKE_INTERVAL	32
135
ulong	innobase_active_counter	= 0;
136 137 138

char*	innobase_home 	= NULL;

unknown's avatar
unknown committed
139 140
char    innodb_dummy_stmt_trx_handle = 'D';

unknown's avatar
unknown committed
141
static HASH 	innobase_open_tables;
142

143 144 145 146
#ifdef __NETWARE__  	/* some special cleanup for NetWare */
bool nw_panic = FALSE;
#endif

147
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
148 149 150 151 152 153
			      my_bool not_used __attribute__((unused)));
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);

/* General functions */

unknown's avatar
unknown committed
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_enter_innodb(
/*=========================*/
	trx_t*	trx)	/* in: transaction handle */
{
	if (srv_thread_concurrency >= 500) {

		return;
	}

	srv_conc_enter_innodb(trx);
}

/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_exit_innodb(
/*========================*/
	trx_t*	trx)	/* in: transaction handle */
{
	if (srv_thread_concurrency >= 500) {

		return;
	}

	srv_conc_exit_innodb(trx);
}

unknown's avatar
unknown committed
188
/**********************************************************************
unknown's avatar
unknown committed
189
Releases possible search latch and InnoDB thread FIFO ticket. These should
unknown's avatar
unknown committed
190 191 192 193
be released at each SQL statement end, and also when mysqld passes the
control to the client. It does no harm to release these also in the middle
of an SQL statement. */
inline
unknown's avatar
unknown committed
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
void
innobase_release_stat_resources(
/*============================*/
	trx_t*	trx)	/* in: transaction object */
{
	if (trx->has_search_latch) {
		trx_search_latch_release_if_reserved(trx);
	}

	if (trx->declared_to_be_inside_innodb) {
		/* Release our possible ticket in the FIFO */

		srv_conc_force_exit_innodb(trx);
	}
}

210 211 212 213 214 215 216 217 218 219 220 221 222
/************************************************************************
Call this function when mysqld passes control to the client. That is to
avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
documentation, see handler.cc. */

void
innobase_release_temporary_latches(
/*===============================*/
        void*   innobase_tid)
{
        innobase_release_stat_resources((trx_t*)innobase_tid);
}

223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
/************************************************************************
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
when a single database operation may introduce a small need for
server utility activity, like checkpointing. */
inline
void
innobase_active_small(void)
/*=======================*/
{
	innobase_active_counter++;

	if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
		srv_active_wake_master_thread();
	}
}

240
/************************************************************************
unknown's avatar
unknown committed
241 242 243
Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
timeout or a deadlock. */
244 245 246 247 248
static
int
convert_error_code_to_mysql(
/*========================*/
			/* out: MySQL error code */
unknown's avatar
unknown committed
249 250
	int	error,	/* in: InnoDB error code */
	THD*	thd)	/* in: user thread handle or NULL */
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
{
	if (error == DB_SUCCESS) {

		return(0);

  	} else if (error == (int) DB_DUPLICATE_KEY) {

    		return(HA_ERR_FOUND_DUPP_KEY);

 	} else if (error == (int) DB_RECORD_NOT_FOUND) {

    		return(HA_ERR_NO_ACTIVE_RECORD);

 	} else if (error == (int) DB_ERROR) {

unknown's avatar
unknown committed
266
    		return(-1); /* unspecified error */
267 268

 	} else if (error == (int) DB_DEADLOCK) {
unknown's avatar
unknown committed
269
 		/* Since we rolled back the whole transaction, we must
unknown's avatar
unknown committed
270 271 272 273 274 275
 		tell it also to MySQL so that MySQL knows to empty the
 		cached binlog for this transaction */

 		if (thd) {
 			ha_rollback(thd);
 		}
276

277 278 279 280
    		return(HA_ERR_LOCK_DEADLOCK);

 	} else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {

unknown's avatar
unknown committed
281
 		/* Since we rolled back the whole transaction, we must
unknown's avatar
unknown committed
282 283 284 285 286 287 288
 		tell it also to MySQL so that MySQL knows to empty the
 		cached binlog for this transaction */

 		if (thd) {
 			ha_rollback(thd);
 		}

unknown's avatar
Merge  
unknown committed
289
    		return(HA_ERR_LOCK_WAIT_TIMEOUT);
290 291 292

 	} else if (error == (int) DB_NO_REFERENCED_ROW) {

unknown's avatar
Merge  
unknown committed
293
    		return(HA_ERR_NO_REFERENCED_ROW);
294 295 296

 	} else if (error == (int) DB_ROW_IS_REFERENCED) {

unknown's avatar
Merge  
unknown committed
297
    		return(HA_ERR_ROW_IS_REFERENCED);
298

299
        } else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) {
300

unknown's avatar
Merge  
unknown committed
301
    		return(HA_ERR_CANNOT_ADD_FOREIGN);
302

unknown's avatar
unknown committed
303 304
        } else if (error == (int) DB_CANNOT_DROP_CONSTRAINT) {

unknown's avatar
unknown committed
305 306 307
    		return(HA_ERR_CANNOT_ADD_FOREIGN); /* TODO: This is a bit
						misleading, a new MySQL error
						code should be introduced */
308 309
        } else if (error == (int) DB_COL_APPEARS_TWICE_IN_INDEX) {

unknown's avatar
unknown committed
310
    		return(HA_ERR_CRASHED);
311

312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
 	} else if (error == (int) DB_OUT_OF_FILE_SPACE) {

    		return(HA_ERR_RECORD_FILE_FULL);

 	} else if (error == (int) DB_TABLE_IS_BEING_USED) {

    		return(HA_ERR_WRONG_COMMAND);

 	} else if (error == (int) DB_TABLE_NOT_FOUND) {

    		return(HA_ERR_KEY_NOT_FOUND);

  	} else if (error == (int) DB_TOO_BIG_RECORD) {

    		return(HA_ERR_TO_BIG_ROW);
unknown's avatar
unknown committed
327 328 329 330

  	} else if (error == (int) DB_CORRUPTION) {

    		return(HA_ERR_CRASHED);
unknown's avatar
unknown committed
331 332 333
  	} else if (error == (int) DB_NO_SAVEPOINT) {

    		return(HA_ERR_NO_SAVEPOINT);
334
    	} else {
unknown's avatar
unknown committed
335
    		return(-1);			// Unknown error
336 337 338
    	}
}

339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
/*****************************************************************
If you want to print a thd that is not associated with the current thread,
you must call this function before reserving the InnoDB kernel_mutex, to
protect MySQL from setting thd->query NULL. If you print a thd of the current
thread, we know that MySQL cannot modify thd->query, and it is not necessary
to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
the kernel_mutex.
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_prepare_print_arbitrary_thd(void)
/*============================================*/
{
	VOID(pthread_mutex_lock(&LOCK_thread_count));
}

/*****************************************************************
Relases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_end_print_arbitrary_thd(void)
/*========================================*/
{
	VOID(pthread_mutex_unlock(&LOCK_thread_count));
}

368 369
/*****************************************************************
Prints info of a THD object (== user session thread) to the
unknown's avatar
unknown committed
370
standard output. NOTE that /mysql/innobase/trx/trx0trx.c must contain
371
the prototype for this function! */
372
extern "C"
373 374 375
void
innobase_mysql_print_thd(
/*=====================*/
376
	FILE*   f,	/* in: output stream */
377
        void*   input_thd)/* in: pointer to a MySQL THD object */
378
{
379 380
	const THD*	thd;
	const char*	s;
unknown's avatar
unknown committed
381
	char		buf[301];
382

383
        thd = (const THD*) input_thd;
384

385 386 387 388 389 390
  	fprintf(f, "MySQL thread id %lu, query id %lu",
		thd->thread_id, thd->query_id);
	if (thd->host) {
		putc(' ', f);
		fputs(thd->host, f);
	}
391

392 393 394 395
	if (thd->ip) {
		putc(' ', f);
		fputs(thd->ip, f);
	}
396

397
  	if (thd->user) {
398 399
		putc(' ', f);
		fputs(thd->user, f);
400 401
  	}

402
	if ((s = thd->proc_info)) {
403
		putc(' ', f);
404
		fputs(s, f);
405
	}
406

407
	if ((s = thd->query)) {
unknown's avatar
unknown committed
408
		/* determine the length of the query string */
unknown's avatar
unknown committed
409 410 411 412 413
		uint32 i, len;
		
		len = thd->query_length;

		if (len > 300) {
414
			len = 300;	/* ADDITIONAL SAFETY: print at most
unknown's avatar
unknown committed
415
					300 chars to reduce the probability of
416
					a seg fault if there is a race in
unknown's avatar
unknown committed
417 418 419
					thd->query_length in MySQL; after
					May 14, 2004 probably no race any more,
					but better be safe */
unknown's avatar
unknown committed
420
		}
unknown's avatar
unknown committed
421

422 423 424
                /* Use strmake to reduce the timeframe
                   for a race, compared to fwrite() */
		i= (uint) (strmake(buf, s, len) - buf);
unknown's avatar
unknown committed
425
		putc('\n', f);
unknown's avatar
unknown committed
426
		fwrite(buf, 1, i, f);
427
	}
428

429
	putc('\n', f);
430 431
}

432 433 434 435 436 437 438 439 440
/*************************************************************************
Creates a temporary file. */
extern "C"
int
innobase_mysql_tmpfile(void)
/*========================*/
			/* out: temporary file descriptor, or < 0 on error */
{
	char	filename[FN_REFLEN];
unknown's avatar
unknown committed
441
	int	fd2 = -1;
442 443 444 445 446 447 448 449
	File	fd = create_temp_file(filename, NullS, "ib",
#ifdef __WIN__
				O_BINARY | O_TRUNC | O_SEQUENTIAL |
				O_TEMPORARY | O_SHORT_LIVED |
#endif /* __WIN__ */
				O_CREAT | O_EXCL | O_RDWR,
				MYF(MY_WME));
	if (fd >= 0) {
unknown's avatar
unknown committed
450 451 452 453
#ifndef __WIN__
		/* On Windows, open files cannot be removed, but files can be
		created with the O_TEMPORARY flag to the same effect
		("delete on close"). */
454 455
		unlink(filename);
#endif /* !__WIN__ */
unknown's avatar
unknown committed
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
		/* Copy the file descriptor, so that the additional resources
		allocated by create_temp_file() can be freed by invoking
		my_close().

		Because the file descriptor returned by this function
		will be passed to fdopen(), it will be closed by invoking
		fclose(), which in turn will invoke close() instead of
		my_close(). */
		fd2 = dup(fd);
		if (fd2 < 0) {
			DBUG_PRINT("error",("Got error %d on dup",fd2));
			my_errno=errno;
			my_error(EE_OUT_OF_FILERESOURCES,
				MYF(ME_BELL+ME_WAITTANG), filename, my_errno);
		}
		my_close(fd, MYF(MY_WME));
	}
	return(fd2);
474 475
}

476
/*************************************************************************
477 478
Gets the InnoDB transaction handle for a MySQL handler object, creates
an InnoDB transaction struct if the corresponding MySQL thread struct still
479
lacks one. */
480
static
481 482 483
trx_t*
check_trx_exists(
/*=============*/
484
			/* out: InnoDB transaction handle */
485 486 487 488
	THD*	thd)	/* in: user thread handle */
{
	trx_t*	trx;

unknown's avatar
unknown committed
489
	ut_ad(thd == current_thd);
unknown's avatar
unknown committed
490

unknown's avatar
unknown committed
491
	trx = (trx_t*) thd->transaction.all.innobase_tid;
492 493

	if (trx == NULL) {
unknown's avatar
unknown committed
494
	        DBUG_ASSERT(thd != NULL);
495
		trx = trx_allocate_for_mysql();
496

497
		trx->mysql_thd = thd;
unknown's avatar
unknown committed
498 499
		trx->mysql_query_str = &((*thd).query);
		
unknown's avatar
unknown committed
500
		thd->transaction.all.innobase_tid = trx;
501

unknown's avatar
unknown committed
502
		/* The execution of a single SQL statement is denoted by
503
		a 'transaction' handle which is a dummy pointer: InnoDB
unknown's avatar
unknown committed
504 505
		remembers internally where the latest SQL statement
		started, and if error handling requires rolling back the
506
		latest statement, InnoDB does a rollback to a savepoint. */
unknown's avatar
unknown committed
507

unknown's avatar
unknown committed
508 509
		thd->transaction.stmt.innobase_tid =
		                  (void*)&innodb_dummy_stmt_trx_handle;
unknown's avatar
unknown committed
510
	} else {
unknown's avatar
unknown committed
511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527
		if (trx->magic_n != TRX_MAGIC_N) {
			mem_analyze_corruption((byte*)trx);

			ut_a(0);
		}
	}

	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	} else {
		trx->check_foreigns = TRUE;
	}

	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
		trx->check_unique_secondary = FALSE;
	} else {
		trx->check_unique_secondary = TRUE;
528 529 530 531 532 533
	}

	return(trx);
}

/*************************************************************************
534
Updates the user_thd field in a handle and also allocates a new InnoDB
535 536
transaction handle if needed, and updates the transaction fields in the
prebuilt struct. */
537
inline
538 539 540 541 542 543
int
ha_innobase::update_thd(
/*====================*/
			/* out: 0 or error code */
	THD*	thd)	/* in: thd to use the handle */
{
544 545
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;
unknown's avatar
unknown committed
546
	
547 548
	trx = check_trx_exists(thd);

549
	if (prebuilt->trx != trx) {
550

551
		row_update_prebuilt_trx(prebuilt, trx);
552 553 554
	}

	user_thd = thd;
555

556 557 558
	return(0);
}

unknown's avatar
unknown committed
559 560 561 562 563 564 565 566 567 568

/*   BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
     ------------------------------------------------------------

1) The use of the query cache for TBL is disabled when there is an
uncommitted change to TBL.

2) When a change to TBL commits, InnoDB stores the current value of
its global trx id counter, let us denote it by INV_TRX_ID, to the table object
in the InnoDB data dictionary, and does only allow such transactions whose
unknown's avatar
unknown committed
569
id <= INV_TRX_ID to use the query cache.
unknown's avatar
unknown committed
570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614

3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
of TBL immediately.

How this is implemented inside InnoDB:

1) Since every modification always sets an IX type table lock on the InnoDB
table, it is easy to check if there can be uncommitted modifications for a
table: just check if there are locks in the lock list of the table.

2) When a transaction inside InnoDB commits, it reads the global trx id
counter and stores the value INV_TRX_ID to the tables on which it had a lock.

3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
InnoDB calls an invalidate method for the MySQL query cache for that table.

How this is implemented inside sql_cache.cc:

1) The query cache for an InnoDB table TBL is invalidated immediately at an
INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
invalidation to the transaction commit.

2) To store or retrieve a value from the query cache of an InnoDB table TBL,
any query must first ask InnoDB's permission. We must pass the thd as a
parameter because InnoDB will look at the trx id, if any, associated with
that thd.

3) Use of the query cache for InnoDB tables is now allowed also when
AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
put restrictions on the use of the query cache.
*/

/**********************************************************************
The MySQL query cache uses this to check from InnoDB if the query cache at
the moment is allowed to operate on an InnoDB table. The SQL query must
be a non-locking SELECT.

The query cache is allowed to operate on certain query only if this function
returns TRUE for all tables in the query.

If thd is not in the autocommit state, this function also starts a new
transaction for thd if there is no active trx yet, and assigns a consistent
read view to it if there is no read view yet. */

unknown's avatar
unknown committed
615
my_bool
unknown's avatar
unknown committed
616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638
innobase_query_caching_of_table_permitted(
/*======================================*/
				/* out: TRUE if permitted, FALSE if not;
				note that the value FALSE does not mean
				we should invalidate the query cache:
				invalidation is called explicitly */
	THD*	thd,		/* in: thd of the user who is trying to
				store a result to the query cache or
				retrieve it */
	char*	full_name,	/* in: concatenation of database name,
				the null character '\0', and the table
				name */
	uint	full_name_len)	/* in: length of the full name, i.e.
				len(dbname) + len(tablename) + 1 */
{
	ibool	is_autocommit;
	trx_t*	trx;
	char	norm_name[1000];

	ut_a(full_name_len < 999);

	if (thd->variables.tx_isolation == ISO_SERIALIZABLE) {
		/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
unknown's avatar
unknown committed
639
		plain SELECT if AUTOCOMMIT is not on. */
unknown's avatar
unknown committed
640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
	
		return((my_bool)FALSE);
	}

	trx = (trx_t*) thd->transaction.all.innobase_tid;

	if (trx == NULL) {
		trx = check_trx_exists(thd);
	}

	innobase_release_stat_resources(trx);

	if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {

		is_autocommit = TRUE;
	} else {
		is_autocommit = FALSE;

	}

unknown's avatar
unknown committed
660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
	if (is_autocommit && trx->n_mysql_tables_in_use == 0) {
		/* We are going to retrieve the query result from the query
		cache. This cannot be a store operation to the query cache
		because then MySQL would have locks on tables already.

		TODO: if the user has used LOCK TABLES to lock the table,
		then we open a transaction in the call of row_.. below.
		That trx can stay open until UNLOCK TABLES. The same problem
		exists even if we do not use the query cache. MySQL should be
		modified so that it ALWAYS calls some cleanup function when
		the processing of a query ends!

		We can imagine we instantaneously serialize this consistent
		read trx to the current trx id counter. If trx2 would have
		changed the tables of a query result stored in the cache, and
		trx2 would have already committed, making the result obsolete,
		then trx2 would have already invalidated the cache. Thus we
		can trust the result in the cache is ok for this query. */
unknown's avatar
unknown committed
678 679 680 681 682 683 684 685 686 687 688 689 690 691

		return((my_bool)TRUE);
	}
	
	/* Normalize the table name to InnoDB format */

	memcpy(norm_name, full_name, full_name_len);

	norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the
					    separator between db and table */
	norm_name[full_name_len] = '\0';
#ifdef __WIN__
	/* Put to lower case */

unknown's avatar
unknown committed
692
	char*	ptr = norm_name;
unknown's avatar
unknown committed
693 694 695 696 697 698

	while (*ptr != '\0') {
	        *ptr = tolower(*ptr);
	        ptr++;
	}
#endif
unknown's avatar
unknown committed
699 700 701 702 703
	/* The call of row_search_.. will start a new transaction if it is
	not yet started */

	thd->transaction.all.innodb_active_trans = 1;

unknown's avatar
unknown committed
704 705
	if (row_search_check_if_query_cache_permitted(trx, norm_name)) {

unknown's avatar
unknown committed
706
		/* printf("Query cache for %s permitted\n", norm_name); */
unknown's avatar
unknown committed
707 708 709 710

		return((my_bool)TRUE);
	}

unknown's avatar
unknown committed
711
	/* printf("Query cache for %s NOT permitted\n", norm_name); */
unknown's avatar
unknown committed
712 713 714 715 716 717 718 719

	return((my_bool)FALSE);
}

/*********************************************************************
Invalidates the MySQL query cache for the table.
NOTE that the exact prototype of this function has to be in
/innobase/row/row0ins.c! */
720
extern "C"
unknown's avatar
unknown committed
721 722 723 724 725
void
innobase_invalidate_query_cache(
/*============================*/
	trx_t*	trx,		/* in: transaction which modifies the table */
	char*	full_name,	/* in: concatenation of database name, null
unknown's avatar
unknown committed
726 727 728 729 730
				char '\0', table name, null char'\0';
				NOTE that in Windows this is always
				in LOWER CASE! */
	ulint	full_name_len)	/* in: full name length where also the null
				chars count */
unknown's avatar
unknown committed
731 732
{
	/* Argument TRUE below means we are using transactions */
unknown's avatar
unknown committed
733
#ifdef HAVE_QUERY_CACHE
unknown's avatar
unknown committed
734 735 736 737
	query_cache.invalidate((THD*)(trx->mysql_thd),
					(const char*)full_name,
					(uint32)full_name_len,
					TRUE);
unknown's avatar
unknown committed
738
#endif
unknown's avatar
unknown committed
739
}
740 741

/*********************************************************************
742 743
Get the quote character to be used in SQL identifiers.
This definition must match the one in innobase/ut/ut0ut.c! */
744
extern "C"
745 746 747
int
mysql_get_identifier_quote_char(
/*============================*/
748
				/* out: quote character to be
749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768
				used in SQL identifiers; EOF if none */
	trx_t*		trx,	/* in: transaction */
	const char*	name,	/* in: name to print */
	ulint		namelen)/* in: length of name */
{
	if (!trx || !trx->mysql_thd) {
		return(EOF);
	}
	return(get_quote_char_for_identifier((THD*) trx->mysql_thd,
						name, namelen));
}

/**************************************************************************
Obtain a pointer to the MySQL THD object, as in current_thd().  This
definition must match the one in sql/ha_innodb.cc! */
extern "C"
void*
innobase_current_thd(void)
/*======================*/
			/* out: MySQL THD object */
769
{
770
	return(current_thd);
unknown's avatar
unknown committed
771 772
}

773 774 775 776
/*********************************************************************
Call this when you have opened a new table handle in HANDLER, before you
call index_read_idx() etc. Actually, we can let the cursor stay open even
over a transaction commit! Then you should call this before every operation,
777
fetch next etc. This function inits the necessary things even after a
778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797
transaction commit. */

void
ha_innobase::init_table_handle_for_HANDLER(void)
/*============================================*/
{
        row_prebuilt_t* prebuilt;

        /* If current thd does not yet have a trx struct, create one.
        If the current handle does not yet have a prebuilt struct, create
        one. Update the trx pointers in the prebuilt struct. Normally
        this operation is done in external_lock. */

        update_thd(current_thd);

        /* Initialize the prebuilt struct much like it would be inited in
        external_lock */

        prebuilt = (row_prebuilt_t*)innobase_prebuilt;

unknown's avatar
unknown committed
798 799
	innobase_release_stat_resources(prebuilt->trx);

800 801 802 803 804 805 806 807
        /* If the transaction is not started yet, start it */

        trx_start_if_not_started_noninline(prebuilt->trx);

        /* Assign a read view if the transaction does not have it yet */

        trx_assign_read_view(prebuilt->trx);

808 809 810 811
	/* Set the MySQL flag to mark that there is an active transaction */

	current_thd->transaction.all.innodb_active_trans = 1;

812 813 814 815 816 817 818 819 820
        /* We did the necessary inits in this function, no need to repeat them
        in row_search_for_mysql */

        prebuilt->sql_stat_start = FALSE;

        /* We let HANDLER always to do the reads as consistent reads, even
        if the trx isolation level would have been specified as SERIALIZABLE */

        prebuilt->select_lock_type = LOCK_NONE;
unknown's avatar
unknown committed
821
        prebuilt->stored_select_lock_type = LOCK_NONE;
822 823 824

        /* Always fetch all columns in the index record */

unknown's avatar
unknown committed
825
        prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
826 827 828 829 830

        /* We want always to fetch all columns in the whole row? Or do
	we???? */

        prebuilt->read_just_key = FALSE;
831 832

	prebuilt->used_in_HANDLER = TRUE;
833 834
}

835
/*************************************************************************
836
Opens an InnoDB database. */
837

838
bool
839 840
innobase_init(void)
/*===============*/
841
			/* out: TRUE if error */
842
{
unknown's avatar
unknown committed
843
	static char	current_dir[3];		/* Set if using current lib */
844 845
	int		err;
	bool		ret;
846
	char 	        *default_path;
unknown's avatar
merge  
unknown committed
847

848 849
  	DBUG_ENTER("innobase_init");

unknown's avatar
unknown committed
850
  	os_innodb_umask = (ulint)my_umask;
unknown's avatar
unknown committed
851

unknown's avatar
unknown committed
852 853 854 855 856 857
	/* First calculate the default path for innodb_data_home_dir etc.,
	in case the user has not given any value.

	Note that when using the embedded server, the datadirectory is not
	necessarily the current directory of this program. */

858
	if (mysqld_embedded) {
unknown's avatar
unknown committed
859
		default_path = mysql_real_data_home;
unknown's avatar
unknown committed
860
		fil_path_to_mysql_datadir = mysql_real_data_home;
unknown's avatar
unknown committed
861 862 863 864 865 866
	} else {
	  	/* It's better to use current lib, to keep paths short */
	  	current_dir[0] = FN_CURLIB;
	  	current_dir[1] = FN_LIBCHAR;
	  	current_dir[2] = 0;
	  	default_path = current_dir;
unknown's avatar
unknown committed
867 868
	}

unknown's avatar
unknown committed
869 870
	ut_a(default_path);

unknown's avatar
unknown committed
871 872 873 874 875 876
	if (specialflag & SPECIAL_NO_PRIOR) {
	        srv_set_thread_priorities = FALSE;
	} else {
	        srv_set_thread_priorities = TRUE;
	        srv_query_thread_priority = QUERY_PRIOR;
	}
unknown's avatar
unknown committed
877

unknown's avatar
unknown committed
878 879
	/* Set InnoDB initialization parameters according to the values
	read from MySQL .cnf file */
unknown's avatar
unknown committed
880

unknown's avatar
unknown committed
881
	/*--------------- Data files -------------------------*/
882

unknown's avatar
unknown committed
883
	/* The default dir for data files is the datadir of MySQL */
unknown's avatar
unknown committed
884 885

	srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
unknown's avatar
unknown committed
886
			 default_path);
unknown's avatar
unknown committed
887

unknown's avatar
unknown committed
888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903
	/* Set default InnoDB data file size to 10 MB and let it be
  	auto-extending. Thus users can use InnoDB in >= 4.0 without having
	to specify any startup options. */

	if (!innobase_data_file_path) {
  		innobase_data_file_path = (char*) "ibdata1:10M:autoextend";
	}

	/* Since InnoDB edits the argument in the next call, we make another
	copy of it: */

	internal_innobase_data_file_path = my_strdup(innobase_data_file_path,
						   MYF(MY_WME));

	ret = (bool) srv_parse_data_file_paths_and_sizes(
				internal_innobase_data_file_path,
unknown's avatar
unknown committed
904 905 906 907 908 909
				&srv_data_file_names,
				&srv_data_file_sizes,
				&srv_data_file_is_raw_partition,
				&srv_n_data_files,
				&srv_auto_extend_last_data_file,
				&srv_last_file_size_max);
910
	if (ret == FALSE) {
unknown's avatar
unknown committed
911 912 913
	  	sql_print_error(
			"InnoDB: syntax error in innodb_data_file_path");
	  	DBUG_RETURN(TRUE);
unknown's avatar
unknown committed
914
	}
915

unknown's avatar
unknown committed
916 917 918 919 920 921 922
	/* -------------- Log files ---------------------------*/

	/* The default dir for log files is the datadir of MySQL */
	
	if (!innobase_log_group_home_dir) {
	  	innobase_log_group_home_dir = default_path;
	}
unknown's avatar
unknown committed
923 924

#ifdef UNIV_LOG_ARCHIVE	  	
unknown's avatar
unknown committed
925 926 927 928 929 930 931
	/* Since innodb_log_arch_dir has no relevance under MySQL,
	starting from 4.0.6 we always set it the same as
	innodb_log_group_home_dir: */

	innobase_log_arch_dir = innobase_log_group_home_dir;

	srv_arch_dir = innobase_log_arch_dir;
unknown's avatar
unknown committed
932
#endif /* UNIG_LOG_ARCHIVE */
unknown's avatar
unknown committed
933

unknown's avatar
unknown committed
934 935 936
	ret = (bool)
		srv_parse_log_group_home_dirs(innobase_log_group_home_dir,
						&srv_log_group_home_dirs);
unknown's avatar
unknown committed
937

unknown's avatar
unknown committed
938 939 940 941
	if (ret == FALSE || innobase_mirrored_log_groups != 1) {
		fprintf(stderr,
		"InnoDB: syntax error in innodb_log_group_home_dir\n"
		"InnoDB: or a wrong number of mirrored log groups\n");
unknown's avatar
unknown committed
942

unknown's avatar
unknown committed
943
		DBUG_RETURN(TRUE);
unknown's avatar
unknown committed
944
	}
unknown's avatar
unknown committed
945

unknown's avatar
unknown committed
946 947 948
	/* --------------------------------------------------*/

	srv_file_flush_method_str = innobase_unix_file_flush_method;
949

unknown's avatar
unknown committed
950
	srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
951
	srv_n_log_files = (ulint) innobase_log_files_in_group;
unknown's avatar
unknown committed
952 953
	srv_log_file_size = (ulint) innobase_log_file_size;

unknown's avatar
unknown committed
954
#ifdef UNIV_LOG_ARCHIVE
unknown's avatar
unknown committed
955
	srv_log_archive_on = (ulint) innobase_log_archive;
unknown's avatar
unknown committed
956
#endif /* UNIV_LOG_ARCHIVE */
unknown's avatar
unknown committed
957
	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
unknown's avatar
unknown committed
958
	srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit;
959

unknown's avatar
unknown committed
960 961
        /* We set srv_pool_size here in units of 1 kB. InnoDB internally
        changes the value so that it becomes the number of database pages. */
unknown's avatar
unknown committed
962 963

        if (innobase_buffer_pool_awe_mem_mb == 0) {
unknown's avatar
unknown committed
964 965 966 967 968 969 970 971 972 973 974 975 976 977 978
                /* Careful here: we first convert the signed long int to ulint
                and only after that divide */
 
                srv_pool_size = ((ulint) innobase_buffer_pool_size) / 1024;
        } else {
                srv_use_awe = TRUE;
                srv_pool_size = (ulint)
                                (1024 * innobase_buffer_pool_awe_mem_mb);
                srv_awe_window_size = (ulint) innobase_buffer_pool_size;
 
                /* Note that what the user specified as
                innodb_buffer_pool_size is actually the AWE memory window
                size in this case, and the real buffer pool size is
                determined by .._awe_mem_mb. */
        }
unknown's avatar
unknown committed
979

unknown's avatar
unknown committed
980 981 982
	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;

	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
983

984
	srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
unknown's avatar
Merge  
unknown committed
985 986 987 988
	srv_thread_concurrency = (ulint) innobase_thread_concurrency;
	srv_force_recovery = (ulint) innobase_force_recovery;

	srv_fast_shutdown = (ibool) innobase_fast_shutdown;
989

unknown's avatar
unknown committed
990
	srv_file_per_table = (ibool) innobase_file_per_table;
991
        srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
unknown's avatar
unknown committed
992 993

	srv_max_n_open_files = (ulint) innobase_open_files;
994
	srv_innodb_status = (ibool) innobase_create_status_file;
unknown's avatar
unknown committed
995

996
	srv_print_verbose_log = mysqld_embedded ? 0 : 1;
unknown's avatar
unknown committed
997

998 999 1000 1001 1002 1003 1004 1005
		/* Store the default charset-collation number of this MySQL
	installation */

	data_mysql_default_charset_coll = (ulint)default_charset_info->number;

	data_mysql_latin1_swedish_charset_coll =
					(ulint)my_charset_latin1.number;

unknown's avatar
unknown committed
1006 1007 1008 1009
	/* Store the latin1_swedish_ci character ordering table to InnoDB. For
	non-latin1_swedish_ci charsets we use the MySQL comparison functions,
	and consequently we do not need to know the ordering internally in
	InnoDB. */
unknown's avatar
unknown committed
1010

1011
	ut_a(0 == strcmp((char*)my_charset_latin1.name,
unknown's avatar
unknown committed
1012 1013
						(char*)"latin1_swedish_ci"));
	memcpy(srv_latin1_ordering, my_charset_latin1.sort_order, 256);
1014

1015 1016 1017 1018 1019 1020 1021 1022
	/* Since we in this module access directly the fields of a trx
        struct, and due to different headers and flags it might happen that
	mutex_t has a different size in this module and in InnoDB
	modules, we check at run time that the size is the same in
	these compilation modules. */

	srv_sizeof_trx_t_in_ha_innodb_cc = sizeof(trx_t);

unknown's avatar
unknown committed
1023
	err = innobase_start_or_create_for_mysql();
1024 1025 1026

	if (err != DB_SUCCESS) {

unknown's avatar
unknown committed
1027
		DBUG_RETURN(1);
1028
	}
unknown's avatar
unknown committed
1029 1030 1031 1032

	(void) hash_init(&innobase_open_tables,system_charset_info, 32, 0, 0,
			 		(hash_get_key) innobase_get_key, 0, 0);
	pthread_mutex_init(&innobase_mutex, MY_MUTEX_INIT_FAST);
1033
	innodb_inited= 1;
unknown's avatar
unknown committed
1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049

	/* If this is a replication slave and we needed to do a crash recovery,
	set the master binlog position to what InnoDB internally knew about
	how far we got transactions durable inside InnoDB. There is a
	problem here: if the user used also MyISAM tables, InnoDB might not
	know the right position for them.

	THIS DOES NOT WORK CURRENTLY because replication seems to initialize
	glob_mi also after innobase_init. */
	
/*	if (trx_sys_mysql_master_log_pos != -1) {
		ut_memcpy(glob_mi.log_file_name, trx_sys_mysql_master_log_name,
				1 + ut_strlen(trx_sys_mysql_master_log_name));
		glob_mi.pos = trx_sys_mysql_master_log_pos;
	}
*/
unknown's avatar
unknown committed
1050
  	DBUG_RETURN(0);
1051 1052 1053
}

/***********************************************************************
1054
Closes an InnoDB database. */
1055

1056
bool
1057 1058
innobase_end(void)
/*==============*/
1059
				/* out: TRUE if error */
1060
{
1061
	int	err= 0;
1062 1063 1064

	DBUG_ENTER("innobase_end");

1065 1066 1067 1068 1069
#ifdef __NETWARE__ 	/* some special cleanup for NetWare */
	if (nw_panic) {
		set_panic_flag_for_netware();
	}
#endif
1070 1071
	if (innodb_inited)
	{
1072 1073 1074 1075 1076 1077 1078 1079 1080
	  if (innobase_very_fast_shutdown) {
	    srv_very_fast_shutdown = TRUE;
	    fprintf(stderr,
"InnoDB: MySQL has requested a very fast shutdown without flushing\n"
"InnoDB: the InnoDB buffer pool to data files. At the next mysqld startup\n"
"InnoDB: InnoDB will do a crash recovery!\n");

	  }

1081 1082 1083 1084 1085 1086
	  innodb_inited= 0;
	  if (innobase_shutdown_for_mysql() != DB_SUCCESS)
	    err= 1;
	  hash_free(&innobase_open_tables);
	  my_free(internal_innobase_data_file_path,MYF(MY_ALLOW_ZERO_PTR));
	  pthread_mutex_destroy(&innobase_mutex);
1087
	}
1088

1089
  	DBUG_RETURN(err);
1090 1091 1092
}

/********************************************************************
unknown's avatar
unknown committed
1093 1094
Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
the logs, and the name of this function should be innobase_checkpoint. */
1095

1096
bool
1097 1098
innobase_flush_logs(void)
/*=====================*/
1099
				/* out: TRUE if error */
1100
{
1101
  	bool 	result = 0;
1102 1103 1104

  	DBUG_ENTER("innobase_flush_logs");

unknown's avatar
unknown committed
1105
	log_buffer_flush_to_disk();
1106

1107 1108 1109 1110
  	DBUG_RETURN(result);
}

/*********************************************************************
1111
Commits a transaction in an InnoDB database. */
1112

unknown's avatar
unknown committed
1113 1114 1115 1116 1117
void
innobase_commit_low(
/*================*/
	trx_t*	trx)	/* in: transaction handle */
{
unknown's avatar
unknown committed
1118
        if (trx->conc_state == TRX_NOT_STARTED) {
unknown's avatar
unknown committed
1119

unknown's avatar
unknown committed
1120 1121
                return;
        }
unknown's avatar
unknown committed
1122

unknown's avatar
unknown committed
1123
#ifdef HAVE_REPLICATION
1124 1125
        if (current_thd->slave_thread) {
                /* Update the replication position info inside InnoDB */
unknown's avatar
unknown committed
1126

1127
                trx->mysql_master_log_file_name
1128
                                        = active_mi->rli.group_master_log_name;
unknown's avatar
unknown committed
1129 1130
                trx->mysql_master_log_pos= ((ib_longlong)
                   			    active_mi->rli.future_group_master_log_pos);
1131
        }
unknown's avatar
SCRUM  
unknown committed
1132
#endif /* HAVE_REPLICATION */
unknown's avatar
unknown committed
1133

unknown's avatar
unknown committed
1134
	trx_commit_for_mysql(trx);
unknown's avatar
unknown committed
1135 1136
}

1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178
/*********************************************************************
Creates an InnoDB transaction struct for the thd if it does not yet have one.
Starts a new InnoDB transaction if a transaction is not yet started. And
assigns a new snapshot for a consistent read if the transaction does not yet
have one. */

int
innobase_start_trx_and_assign_read_view(
/*====================================*/
			/* out: 0 */
	THD*	thd)	/* in: MySQL thread handle of the user for whom
			the transaction should be committed */
{
	trx_t*	trx;

  	DBUG_ENTER("innobase_start_trx_and_assign_read_view");

	/* Create a new trx struct for thd, if it does not yet have one */

	trx = check_trx_exists(thd);

	/* This is just to play safe: release a possible FIFO ticket and
	search latch. Since we will reserve the kernel mutex, we have to
	release the search system latch first to obey the latching order. */

	innobase_release_stat_resources(trx);

	/* If the transaction is not started yet, start it */

	trx_start_if_not_started_noninline(trx);

	/* Assign a read view if the transaction does not have it yet */

	trx_assign_read_view(trx);

	/* Set the MySQL flag to mark that there is an active transaction */

	current_thd->transaction.all.innodb_active_trans = 1;

	DBUG_RETURN(0);
}

unknown's avatar
unknown committed
1179
/*********************************************************************
unknown's avatar
unknown committed
1180 1181
Commits a transaction in an InnoDB database or marks an SQL statement
ended. */
unknown's avatar
unknown committed
1182

1183 1184 1185
int
innobase_commit(
/*============*/
unknown's avatar
unknown committed
1186
			/* out: 0 */
unknown's avatar
unknown committed
1187
	THD*	thd,	/* in: MySQL thread handle of the user for whom
1188
			the transaction should be committed */
1189 1190
	void*	trx_handle)/* in: InnoDB trx handle or
			&innodb_dummy_stmt_trx_handle: the latter means
unknown's avatar
unknown committed
1191
			that the current SQL statement ended */
1192
{
1193
	trx_t*	trx;
1194 1195 1196 1197

  	DBUG_ENTER("innobase_commit");
  	DBUG_PRINT("trans", ("ending transaction"));

unknown's avatar
unknown committed
1198
	trx = check_trx_exists(thd);
1199

unknown's avatar
unknown committed
1200 1201 1202
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */
unknown's avatar
unknown committed
1203

unknown's avatar
unknown committed
1204
	innobase_release_stat_resources(trx);
unknown's avatar
unknown committed
1205

unknown's avatar
unknown committed
1206
	/* The flag thd->transaction.all.innodb_active_trans is set to 1 in
unknown's avatar
unknown committed
1207 1208 1209

	1. ::external_lock(),
	2. ::start_stmt(),
1210
	3. innobase_query_caching_of_table_permitted(),
unknown's avatar
unknown committed
1211
	4. innobase_savepoint(),
1212 1213
	5. ::init_table_handle_for_HANDLER(),
	6. innobase_start_trx_and_assign_read_view()
unknown's avatar
unknown committed
1214 1215 1216 1217 1218

	and it is only set to 0 in a commit or a rollback. If it is 0 we know
	there cannot be resources to be freed and we could return immediately.
	For the time being, we play safe and do the cleanup though there should
	be nothing to clean up. */
unknown's avatar
unknown committed
1219

unknown's avatar
unknown committed
1220 1221 1222 1223 1224 1225
	if (thd->transaction.all.innodb_active_trans == 0
	    && trx->conc_state != TRX_NOT_STARTED) {
	    
	        fprintf(stderr,
"InnoDB: Error: thd->transaction.all.innodb_active_trans == 0\n"
"InnoDB: but trx->conc_state != TRX_NOT_STARTED\n");
unknown's avatar
unknown committed
1226
	}
unknown's avatar
unknown committed
1227 1228 1229 1230

	if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
	        
unknown's avatar
unknown committed
1231 1232 1233
		/* We were instructed to commit the whole transaction, or
		this is an SQL statement end and autocommit is on */

unknown's avatar
unknown committed
1234
		innobase_commit_low(trx);
unknown's avatar
unknown committed
1235 1236 1237

		thd->transaction.all.innodb_active_trans = 0;
	} else {
unknown's avatar
unknown committed
1238 1239 1240
	        /* We just mark the SQL statement ended and do not do a
		transaction commit */

unknown's avatar
unknown committed
1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251
		if (trx->auto_inc_lock) {
			/* If we had reserved the auto-inc lock for some
			table in this SQL statement we release it now */
		  	
			row_unlock_table_autoinc_for_mysql(trx);
		}
		/* Store the current undo_no of the transaction so that we
		know where to roll back if we have to roll back the next
		SQL statement */

		trx_mark_sql_stat_end(trx);
unknown's avatar
unknown committed
1252
	}
1253

unknown's avatar
unknown committed
1254 1255
	/* Tell the InnoDB server that there might be work for utility
	threads: */
1256 1257 1258

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
1259
	DBUG_RETURN(0);
1260 1261
}

1262 1263 1264 1265
/*********************************************************************
This is called when MySQL writes the binlog entry for the current
transaction. Writes to the InnoDB tablespace info which tells where the
MySQL binlog entry for the current transaction ended. Also commits the
unknown's avatar
unknown committed
1266
transaction inside InnoDB but does NOT flush InnoDB log files to disk.
unknown's avatar
unknown committed
1267
To flush you have to call innobase_commit_complete(). We have separated
unknown's avatar
unknown committed
1268 1269
flushing to eliminate the bottleneck of LOCK_log in log.cc which disabled
InnoDB's group commit capability. */
1270 1271 1272 1273

int
innobase_report_binlog_offset_and_commit(
/*=====================================*/
unknown's avatar
unknown committed
1274
                                /* out: 0 */
1275
        THD*    thd,            /* in: user thread */
unknown's avatar
unknown committed
1276
        void*   trx_handle,     /* in: InnoDB trx handle */
1277 1278
        char*   log_file_name,  /* in: latest binlog file name */
        my_off_t end_offset)    /* in: the offset in the binlog file
unknown's avatar
unknown committed
1279
                                   up to which we wrote */
1280
{
unknown's avatar
unknown committed
1281 1282 1283
	trx_t*	trx;

	trx = (trx_t*)trx_handle;
1284

unknown's avatar
unknown committed
1285 1286
	ut_a(trx != NULL);

unknown's avatar
unknown committed
1287 1288 1289
	trx->mysql_log_file_name = log_file_name;  	
	trx->mysql_log_offset = (ib_longlong)end_offset;
	
unknown's avatar
unknown committed
1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322
	trx->flush_log_later = TRUE;

  	innobase_commit(thd, trx_handle);

	trx->flush_log_later = FALSE;

	return(0);
}

/*********************************************************************
This is called after MySQL has written the binlog entry for the current
transaction. Flushes the InnoDB log files to disk if required. */

int
innobase_commit_complete(
/*=====================*/
                                /* out: 0 */
        void*   trx_handle)     /* in: InnoDB trx handle */
{
	trx_t*	trx;

	if (srv_flush_log_at_trx_commit == 0) {

	        return(0);
	}

	trx = (trx_t*)trx_handle;

	ut_a(trx != NULL);

  	trx_commit_complete_for_mysql(trx);

	return(0);
1323 1324
}

1325
/*********************************************************************
unknown's avatar
unknown committed
1326
Rolls back a transaction or the latest SQL statement. */
1327 1328 1329 1330 1331

int
innobase_rollback(
/*==============*/
			/* out: 0 or error number */
unknown's avatar
unknown committed
1332
	THD*	thd,	/* in: handle to the MySQL thread of the user
1333
			whose transaction should be rolled back */
1334 1335 1336
	void*	trx_handle)/* in: InnoDB trx handle or a dummy stmt handle;
			the latter means we roll back the latest SQL
			statement */
1337 1338
{
	int	error = 0;
1339
	trx_t*	trx;
1340

1341 1342 1343
	DBUG_ENTER("innobase_rollback");
	DBUG_PRINT("trans", ("aborting transaction"));

1344
	trx = check_trx_exists(thd);
1345

unknown's avatar
unknown committed
1346 1347 1348 1349 1350 1351
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

unknown's avatar
unknown committed
1352
        if (trx->auto_inc_lock) {
unknown's avatar
unknown committed
1353 1354 1355 1356
		/* If we had reserved the auto-inc lock for some table (if
		we come here to roll back the latest SQL statement) we
		release it now before a possibly lengthy rollback */
		
unknown's avatar
unknown committed
1357 1358 1359
		row_unlock_table_autoinc_for_mysql(trx);
	}

unknown's avatar
unknown committed
1360 1361
	if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
unknown's avatar
Merge  
unknown committed
1362

1363
		error = trx_rollback_for_mysql(trx);
unknown's avatar
unknown committed
1364
		thd->transaction.all.innodb_active_trans = 0;
unknown's avatar
unknown committed
1365
	} else {
1366
		error = trx_rollback_last_sql_stat_for_mysql(trx);
unknown's avatar
unknown committed
1367
	}
1368

unknown's avatar
unknown committed
1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

/*********************************************************************
Rolls back a transaction to a savepoint. */

int
innobase_rollback_to_savepoint(
/*===========================*/
				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
				no savepoint with the given name */
	THD*	thd,		/* in: handle to the MySQL thread of the user
				whose transaction should be rolled back */
	char*	savepoint_name,	/* in: savepoint name */
	my_off_t* binlog_cache_pos)/* out: position which corresponds to the
				savepoint in the binlog cache of this
				transaction, not defined if error */
{
	ib_longlong mysql_binlog_cache_pos;
	int	    error = 0;
	trx_t*	    trx;

	DBUG_ENTER("innobase_rollback_to_savepoint");

	trx = check_trx_exists(thd);

unknown's avatar
unknown committed
1395 1396 1397
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */
unknown's avatar
unknown committed
1398 1399

	innobase_release_stat_resources(trx);
unknown's avatar
unknown committed
1400

unknown's avatar
unknown committed
1401 1402 1403
	error = trx_rollback_to_savepoint_for_mysql(trx, savepoint_name,
						&mysql_binlog_cache_pos);
	*binlog_cache_pos = (my_off_t)mysql_binlog_cache_pos;
unknown's avatar
unknown committed
1404

unknown's avatar
unknown committed
1405
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
1406 1407 1408
}

/*********************************************************************
unknown's avatar
unknown committed
1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435
Sets a transaction savepoint. */

int
innobase_savepoint(
/*===============*/
				/* out: always 0, that is, always succeeds */
	THD*	thd,		/* in: handle to the MySQL thread */
	char*	savepoint_name,	/* in: savepoint name */
	my_off_t binlog_cache_pos)/* in: offset up to which the current
				transaction has cached log entries to its
				binlog cache, not defined if no transaction
				active, or we are in the autocommit state, or
				binlogging is not switched on */
{
	int	error = 0;
	trx_t*	trx;

	DBUG_ENTER("innobase_savepoint");

	if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
		/* In the autocommit state there is no sense to set a
		savepoint: we return immediate success */
	        DBUG_RETURN(0);
	}

	trx = check_trx_exists(thd);

unknown's avatar
unknown committed
1436 1437 1438 1439 1440 1441
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

unknown's avatar
unknown committed
1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453
	/* Setting a savepoint starts a transaction inside InnoDB since
	it allocates resources for it (memory to store the savepoint name,
	for example) */

	thd->transaction.all.innodb_active_trans = 1;

	error = trx_savepoint_for_mysql(trx, savepoint_name,
					     (ib_longlong)binlog_cache_pos);

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

1454
/*********************************************************************
unknown's avatar
unknown committed
1455
Frees a possible InnoDB trx object associated with the current THD. */
1456 1457 1458 1459

int
innobase_close_connection(
/*======================*/
unknown's avatar
unknown committed
1460 1461 1462
			/* out: 0 or error number */
	THD*	thd)	/* in: handle to the MySQL thread of the user
			whose transaction should be rolled back */
1463
{
unknown's avatar
unknown committed
1464
	trx_t*	trx;
unknown's avatar
unknown committed
1465

unknown's avatar
unknown committed
1466
	trx = (trx_t*)thd->transaction.all.innobase_tid;
1467

unknown's avatar
unknown committed
1468 1469
	if (NULL != trx) {
	        innobase_rollback(thd, (void*)trx);
unknown's avatar
unknown committed
1470

unknown's avatar
unknown committed
1471
		trx_free_for_mysql(trx);
unknown's avatar
unknown committed
1472

unknown's avatar
unknown committed
1473 1474
		thd->transaction.all.innobase_tid = NULL;
	}
unknown's avatar
unknown committed
1475

unknown's avatar
unknown committed
1476
	return(0);
1477
}
1478 1479 1480


/*****************************************************************************
1481
** InnoDB database tables
1482 1483 1484
*****************************************************************************/

/********************************************************************
1485 1486 1487
This function is not relevant since we store the tables and indexes
into our own tablespace, not as files, whose extension this function would
give. */
1488 1489 1490 1491

const char**
ha_innobase::bas_ext() const
/*========================*/
1492 1493
				/* out: file extension strings, currently not
				used */
1494
{
1495
	static const char* ext[] = {".InnoDB", NullS};
1496

1497 1498 1499
	return(ext);
}

1500 1501 1502
/*********************************************************************
Normalizes a table name string. A normalized name consists of the
database name catenated to '/' and table name. An example:
unknown's avatar
unknown committed
1503 1504
test/mytable. On Windows normalization puts both the database name and the
table name always to lower case. */
1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518
static
void
normalize_table_name(
/*=================*/
	char*		norm_name,	/* out: normalized name as a
					null-terminated string */
	const char*	name)		/* in: table name string */
{
	char*	name_ptr;
	char*	db_ptr;
	char*	ptr;

	/* Scan name from the end */

unknown's avatar
unknown committed
1519
	ptr = strend(name)-1;
1520 1521 1522 1523 1524 1525 1526

	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	name_ptr = ptr + 1;

unknown's avatar
unknown committed
1527
	DBUG_ASSERT(ptr > name);
1528 1529

	ptr--;
1530

1531 1532 1533 1534 1535 1536 1537 1538 1539
	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	db_ptr = ptr + 1;

	memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name));

	norm_name[name_ptr - db_ptr - 1] = '/';
unknown's avatar
unknown committed
1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550

#ifdef __WIN__
	/* Put to lower case */

	ptr = norm_name;

	while (*ptr != '\0') {
	        *ptr = tolower(*ptr);
	        ptr++;
	}
#endif
1551
}
1552

1553
/*********************************************************************
unknown's avatar
unknown committed
1554
Creates and opens a handle to a table which already exists in an InnoDB
1555 1556 1557 1558 1559 1560 1561 1562
database. */

int
ha_innobase::open(
/*==============*/
					/* out: 1 if error, 0 if success */
	const char*	name,		/* in: table name */
	int 		mode,		/* in: not used */
unknown's avatar
unknown committed
1563
	uint 		test_if_locked)	/* in: not used */
1564
{
1565 1566
	dict_table_t*	ib_table;
  	char		norm_name[1000];
1567
	THD*		thd;
1568 1569 1570 1571 1572 1573

	DBUG_ENTER("ha_innobase::open");

	UT_NOT_USED(mode);
	UT_NOT_USED(test_if_locked);

1574
	thd = current_thd;
1575 1576
	normalize_table_name(norm_name, name);

1577 1578
	user_thd = NULL;

unknown's avatar
unknown committed
1579 1580
	last_query_id = (ulong)-1;

unknown's avatar
unknown committed
1581 1582 1583 1584
	if (!(share=get_share(name))) {

		DBUG_RETURN(1);
	}
1585

1586 1587 1588 1589
	/* Create buffers for packing the fields of a record. Why
	table->reclength did not work here? Obviously, because char
	fields when packed actually became 1 byte longer, when we also
	stored the string length as the first byte. */
1590

unknown's avatar
unknown committed
1591
	upd_and_key_val_buff_len = table->reclength + table->max_key_length
1592
							+ MAX_REF_PARTS * 3;
1593
	if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME),
unknown's avatar
unknown committed
1594 1595
				     &upd_buff, upd_and_key_val_buff_len,
				     &key_val_buff, upd_and_key_val_buff_len,
1596
				     NullS)) {
1597
	  	free_share(share);
unknown's avatar
unknown committed
1598

1599
	  	DBUG_RETURN(1);
1600 1601
  	}

1602
	/* Get pointer to a table object in InnoDB dictionary cache */
1603

1604 1605 1606
	ib_table = dict_table_get_and_increment_handle_count(
				      		     norm_name, NULL);
 	if (NULL == ib_table) {
unknown's avatar
unknown committed
1607 1608 1609 1610 1611 1612 1613 1614 1615
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"Cannot find table %s from the internal data dictionary\n"
"of InnoDB though the .frm file for the table exists. Maybe you\n"
"have deleted and recreated InnoDB data files but have forgotten\n"
"to delete the corresponding .frm files of InnoDB tables, or you\n"
"have moved .frm files to another database?\n"
"Look from section 15.1 of http://www.innodb.com/ibman.html\n"
"how you can resolve the problem.\n",
1616
			  norm_name);
unknown's avatar
unknown committed
1617 1618 1619
	        free_share(share);
    		my_free((char*) upd_buff, MYF(0));
    		my_errno = ENOENT;
1620

unknown's avatar
unknown committed
1621 1622 1623
    		DBUG_RETURN(1);
  	}

1624
 	if (ib_table->ibd_file_missing && !thd->tablespace_op) {
unknown's avatar
unknown committed
1625 1626 1627 1628 1629 1630 1631 1632 1633
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"MySQL is trying to open a table handle but the .ibd file for\n"
"table %s does not exist.\n"
"Have you deleted the .ibd file from the database directory under\n"
"the MySQL datadir, or have you used DISCARD TABLESPACE?\n"
"Look from section 15.1 of http://www.innodb.com/ibman.html\n"
"how you can resolve the problem.\n",
			  norm_name);
1634
	        free_share(share);
1635
    		my_free((char*) upd_buff, MYF(0));
1636
    		my_errno = ENOENT;
unknown's avatar
unknown committed
1637

1638 1639 1640
    		DBUG_RETURN(1);
  	}

1641
	innobase_prebuilt = row_create_prebuilt(ib_table);
1642

1643
	((row_prebuilt_t*)innobase_prebuilt)->mysql_row_len = table->reclength;
1644

unknown's avatar
unknown committed
1645 1646 1647 1648
	/* Looks like MySQL-3.23 sometimes has primary key number != 0 */

 	primary_key = table->primary_key;
	key_used_on_scan = primary_key;
1649

unknown's avatar
unknown committed
1650 1651 1652 1653 1654
	/* Allocate a buffer for a 'row reference'. A row reference is
	a string of bytes of length ref_length which uniquely specifies
        a row in our table. Note that MySQL may also compare two row
        references for equality by doing a simple memcmp on the strings
        of length ref_length! */
1655

unknown's avatar
unknown committed
1656
  	if (!row_table_got_default_clust_index(ib_table)) {
unknown's avatar
unknown committed
1657 1658 1659 1660 1661
	        if (primary_key >= MAX_KEY) {
	                fprintf(stderr,
		    "InnoDB: Error: table %s has a primary key in InnoDB\n"
		    "InnoDB: data dictionary, but not in MySQL!\n", name);
		}
1662 1663 1664

		((row_prebuilt_t*)innobase_prebuilt)
				->clust_index_was_generated = FALSE;
unknown's avatar
unknown committed
1665
 		/*
unknown's avatar
unknown committed
1666 1667 1668 1669 1670
		  MySQL allocates the buffer for ref. key_info->key_length
		  includes space for all key columns + one byte for each column
		  that may be NULL. ref_length must be as exact as possible to
		  save space, because all row reference buffers are allocated
		  based on ref_length.
unknown's avatar
unknown committed
1671
		*/
unknown's avatar
unknown committed
1672
 
unknown's avatar
unknown committed
1673
  		ref_length = table->key_info[primary_key].key_length;
1674
	} else {
unknown's avatar
unknown committed
1675 1676 1677
	        if (primary_key != MAX_KEY) {
	                fprintf(stderr,
		    "InnoDB: Error: table %s has no primary key in InnoDB\n"
unknown's avatar
unknown committed
1678 1679 1680 1681 1682 1683 1684 1685
		    "InnoDB: data dictionary, but has one in MySQL!\n"
		    "InnoDB: If you created the table with a MySQL\n"
                    "InnoDB: version < 3.23.54 and did not define a primary\n"
                    "InnoDB: key, but defined a unique key with all non-NULL\n"
                    "InnoDB: columns, then MySQL internally treats that key\n"
                    "InnoDB: as the primary key. You can fix this error by\n"
		    "InnoDB: dump + DROP + CREATE + reimport of the table.\n",
				name);
unknown's avatar
unknown committed
1686 1687
		}

1688 1689 1690
		((row_prebuilt_t*)innobase_prebuilt)
				->clust_index_was_generated = TRUE;

unknown's avatar
unknown committed
1691
  		ref_length = DATA_ROW_ID_LEN;
unknown's avatar
unknown committed
1692

unknown's avatar
unknown committed
1693 1694 1695 1696 1697 1698 1699 1700 1701
		/*
		  If we automatically created the clustered index, then
		  MySQL does not know about it, and MySQL must NOT be aware
		  of the index used on scan, to make it avoid checking if we
		  update the column of the index. That is why we assert below
		  that key_used_on_scan is the undefined value MAX_KEY.
		  The column is the row id in the automatical generation case,
		  and it will never be updated anyway.
		*/
unknown's avatar
unknown committed
1702 1703 1704 1705 1706
	       
		if (key_used_on_scan != MAX_KEY) {
	                fprintf(stderr,
"InnoDB: Warning: table %s key_used_on_scan is %lu even though there is no\n"
"InnoDB: primary key inside InnoDB.\n",
1707
				name, (ulong)key_used_on_scan);
unknown's avatar
unknown committed
1708
		}
1709
	}
1710

unknown's avatar
Merge  
unknown committed
1711 1712
	auto_inc_counter_for_this_stat = 0;

unknown's avatar
unknown committed
1713 1714 1715
	block_size = 16 * 1024;	/* Index block size in InnoDB: used by MySQL
				in query optimization */

unknown's avatar
Merge  
unknown committed
1716
	/* Init table lock structure */
1717
	thr_lock_data_init(&share->lock,&lock,(void*) 0);
1718 1719

  	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
1720

1721 1722 1723 1724
  	DBUG_RETURN(0);
}

/**********************************************************************
1725
Closes a handle to an InnoDB table. */
1726 1727 1728 1729 1730 1731 1732 1733 1734 1735

int
ha_innobase::close(void)
/*====================*/
				/* out: error number */
{
  	DBUG_ENTER("ha_innobase::close");

	row_prebuilt_free((row_prebuilt_t*) innobase_prebuilt);

1736
    	my_free((char*) upd_buff, MYF(0));
1737 1738
        free_share(share);

1739
	/* Tell InnoDB server that there might be work for
1740 1741 1742 1743 1744 1745 1746
	utility threads: */

	srv_active_wake_master_thread();

  	DBUG_RETURN(0);
}

unknown's avatar
unknown committed
1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822
/* The following accessor functions should really be inside MySQL code! */

/******************************************************************
Gets field offset for a field in a table. */
inline
uint
get_field_offset(
/*=============*/
			/* out: offset */
	TABLE*	table,	/* in: MySQL table object */
	Field*	field)	/* in: MySQL field object */
{
	return((uint) (field->ptr - (char*) table->record[0]));
}

/******************************************************************
Checks if a field in a record is SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
uint
field_in_record_is_null(
/*====================*/
			/* out: 1 if NULL, 0 otherwise */
	TABLE*	table,	/* in: MySQL table object */
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	if (!field->null_ptr) {

		return(0);
	}

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	if (record[null_offset] & field->null_bit) {

		return(1);
	}

	return(0);
}

/******************************************************************
Sets a field in a record to SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
void
set_field_in_record_to_null(
/*========================*/
	TABLE*	table,	/* in: MySQL table object */
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	record[null_offset] = record[null_offset] | field->null_bit;
}

/******************************************************************
Resets SQL NULL bits in a record to zero. */
inline
void
reset_null_bits(
/*============*/
	TABLE*	table,	/* in: MySQL table object */
	char*	record)	/* in: a row in MySQL format */
{
	bzero(record, table->null_bytes);
}

1823 1824
extern "C" {
/*****************************************************************
unknown's avatar
unknown committed
1825 1826 1827 1828
InnoDB uses this function to compare two data fields for which the data type
is such that we must use MySQL code to compare them. NOTE that the prototype
of this function is in rem0cmp.c in InnoDB source code! If you change this
function, remember to update the prototype there! */
1829 1830 1831

int
innobase_mysql_cmp(
1832
/*===============*/
1833 1834
					/* out: 1, 0, -1, if a is greater,
					equal, less than b, respectively */
1835
	int		mysql_type,	/* in: MySQL type */
unknown's avatar
unknown committed
1836
	uint		charset_number,	/* in: number of the charset */
1837 1838 1839 1840 1841 1842 1843
	unsigned char*	a,		/* in: data field */
	unsigned int	a_length,	/* in: data field length,
					not UNIV_SQL_NULL */
	unsigned char*	b,		/* in: data field */
	unsigned int	b_length)	/* in: data field length,
					not UNIV_SQL_NULL */
{
unknown's avatar
unknown committed
1844
	CHARSET_INFO*		charset;
1845
	enum_field_types	mysql_tp;
1846
	int                     ret;
1847

unknown's avatar
unknown committed
1848 1849
	DBUG_ASSERT(a_length != UNIV_SQL_NULL);
	DBUG_ASSERT(b_length != UNIV_SQL_NULL);
1850 1851 1852 1853 1854 1855 1856

	mysql_tp = (enum_field_types) mysql_type;

	switch (mysql_tp) {

	case FIELD_TYPE_STRING:
	case FIELD_TYPE_VAR_STRING:
unknown's avatar
unknown committed
1857 1858 1859 1860
	case FIELD_TYPE_TINY_BLOB:
	case FIELD_TYPE_MEDIUM_BLOB:
	case FIELD_TYPE_BLOB:
	case FIELD_TYPE_LONG_BLOB:
unknown's avatar
unknown committed
1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880
		/* Use the charset number to pick the right charset struct for
		the comparison. Since the MySQL function get_charset may be
		slow before Bar removes the mutex operation there, we first
		look at 2 common charsets directly. */

		if (charset_number == default_charset_info->number) {
			charset = default_charset_info;
		} else if (charset_number == my_charset_latin1.number) {
			charset = &my_charset_latin1;
		} else {
			charset = get_charset(charset_number, MYF(MY_WME));

			if (charset == NULL) {
				fprintf(stderr,
"InnoDB: fatal error: InnoDB needs charset %lu for doing a comparison,\n"
"InnoDB: but MySQL cannot find that charset.\n", (ulong)charset_number);
				ut_a(0);
			}
		}

unknown's avatar
unknown committed
1881 1882 1883 1884 1885 1886
                /* Starting from 4.1.3, we use strnncollsp() in comparisons of
                non-latin1_swedish_ci strings. NOTE that the collation order
                changes then: 'b\0\0...' is ordered BEFORE 'b  ...'. Users
                having indexes on such data need to rebuild their tables! */

                ret = charset->coll->strnncollsp(charset,
unknown's avatar
 
unknown committed
1887 1888
                                  a, a_length,
                                  b, b_length);
1889
		if (ret < 0) {
1890
		        return(-1);
1891
		} else if (ret > 0) {
1892
		        return(1);
1893
		} else {
1894
		        return(0);
1895
	        }
1896 1897 1898 1899 1900 1901 1902 1903 1904
	default:
		assert(0);
	}

	return(0);
}
}

/******************************************************************
1905
Converts a MySQL type to an InnoDB type. */
1906 1907
inline
ulint
1908 1909 1910
get_innobase_type_from_mysql_type(
/*==============================*/
			/* out: DATA_BINARY, DATA_VARCHAR, ... */
1911 1912
	Field*	field)	/* in: MySQL field */
{
unknown's avatar
unknown committed
1913 1914 1915
	/* The following asserts try to check that the MySQL type code fits in
	8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
	the type */
1916

unknown's avatar
unknown committed
1917 1918 1919 1920 1921
	DBUG_ASSERT((ulint)FIELD_TYPE_STRING < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_VAR_STRING < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_DOUBLE < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_FLOAT < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_DECIMAL < 256);
1922 1923

	switch (field->type()) {
unknown's avatar
unknown committed
1924 1925
	        /* NOTE that we only allow string types in DATA_MYSQL
		and DATA_VARMYSQL */
1926
		case FIELD_TYPE_VAR_STRING: if (field->binary()) {
1927 1928 1929

						return(DATA_BINARY);
					} else if (strcmp(
unknown's avatar
unknown committed
1930 1931
						  field->charset()->name,
						 "latin1_swedish_ci") == 0) {
1932
						return(DATA_VARCHAR);
1933 1934
					} else {
						return(DATA_VARMYSQL);
1935
					}
1936
		case FIELD_TYPE_STRING: if (field->binary()) {
1937 1938 1939

						return(DATA_FIXBINARY);
					} else if (strcmp(
unknown's avatar
unknown committed
1940 1941
						   field->charset()->name,
						   "latin1_swedish_ci") == 0) {
1942
						return(DATA_CHAR);
1943 1944
					} else {
						return(DATA_MYSQL);
1945
					}
1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956
		case FIELD_TYPE_LONG:
		case FIELD_TYPE_LONGLONG:
		case FIELD_TYPE_TINY:
		case FIELD_TYPE_SHORT:
		case FIELD_TYPE_INT24:
		case FIELD_TYPE_DATE:
		case FIELD_TYPE_DATETIME:
		case FIELD_TYPE_YEAR:
		case FIELD_TYPE_NEWDATE:
		case FIELD_TYPE_ENUM:
		case FIELD_TYPE_SET:
1957 1958 1959
		case FIELD_TYPE_TIME:
		case FIELD_TYPE_TIMESTAMP:
					return(DATA_INT);
1960
		case FIELD_TYPE_FLOAT:
1961
					return(DATA_FLOAT);
1962
		case FIELD_TYPE_DOUBLE:
1963
					return(DATA_DOUBLE);
1964
		case FIELD_TYPE_DECIMAL:
1965 1966 1967 1968 1969 1970
					return(DATA_DECIMAL);
		case FIELD_TYPE_TINY_BLOB:
		case FIELD_TYPE_MEDIUM_BLOB:
		case FIELD_TYPE_BLOB:
		case FIELD_TYPE_LONG_BLOB:
					return(DATA_BLOB);
1971 1972 1973 1974 1975 1976
		default:
					assert(0);
	}

	return(0);
}
1977

1978
/***********************************************************************
1979
Stores a key value for a row to a buffer. */
1980 1981 1982 1983 1984 1985 1986

uint
ha_innobase::store_key_val_for_row(
/*===============================*/
				/* out: key value length as stored in buff */
	uint 		keynr,	/* in: key number */
	char*		buff,	/* in/out: buffer for the key value (in MySQL
1987 1988
				format) */
	uint		buff_len,/* in: buffer length */
1989
	const mysql_byte* record)/* in: row in MySQL format */
1990 1991 1992 1993 1994
{
	KEY*		key_info 	= table->key_info + keynr;
  	KEY_PART_INFO*	key_part	= key_info->key_part;
  	KEY_PART_INFO*	end		= key_part + key_info->key_parts;
	char*		buff_start	= buff;
unknown's avatar
unknown committed
1995 1996 1997 1998 1999
	enum_field_types mysql_type;
	Field*		field;
	ulint		blob_len;
	byte*		blob_data;
	ibool		is_null;
2000

2001 2002
  	DBUG_ENTER("store_key_val_for_row");

unknown's avatar
unknown committed
2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
	/* The format for storing a key field in MySQL is the following:

	1. If the column can be NULL, then in the first byte we put 1 if the
	field value is NULL, 0 otherwise.

	2. If the column is of a BLOB type (it must be a column prefix field
	in this case), then we put the length of the data in the field to the
	next 2 bytes, in the little-endian format. If the field is SQL NULL,
	then these 2 bytes are set to 0. Note that the length of data in the
	field is <= column prefix length.

	3. In a column prefix field, prefix_len next bytes are reserved for
	data. In a normal field the max field length next bytes are reserved
	for data. For a VARCHAR(n) the max field length is n. If the stored
	value is the SQL NULL then these data bytes are set to 0. */	

2019 2020 2021
	/* We have to zero-fill the buffer so that MySQL is able to use a
	simple memcmp to compare two key values to determine if they are
	equal. MySQL does this to compare contents of two 'ref' values. */
unknown's avatar
unknown committed
2022

2023
	bzero(buff, buff_len);
unknown's avatar
unknown committed
2024

2025
  	for (; key_part != end; key_part++) {
unknown's avatar
unknown committed
2026
	        is_null = FALSE;
2027 2028 2029 2030

    		if (key_part->null_bit) {
      			if (record[key_part->null_offset]
						& key_part->null_bit) {
unknown's avatar
unknown committed
2031 2032 2033 2034 2035 2036
				*buff = 1;
				is_null = TRUE;
      			} else {
				*buff = 0;
			}
			buff++;
2037
    		}
2038

unknown's avatar
unknown committed
2039 2040 2041 2042 2043 2044 2045
		field = key_part->field;
		mysql_type = field->type();

		if (mysql_type == FIELD_TYPE_TINY_BLOB
		    || mysql_type == FIELD_TYPE_MEDIUM_BLOB
		    || mysql_type == FIELD_TYPE_BLOB
		    || mysql_type == FIELD_TYPE_LONG_BLOB) {
2046

2047
			ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
unknown's avatar
unknown committed
2048 2049 2050 2051 2052 2053 2054 2055 2056

		        if (is_null) {
				 buff += key_part->length + 2;
				 
				 continue;
			}
		    
		        blob_data = row_mysql_read_blob_ref(&blob_len,
				(byte*) (record
unknown's avatar
unknown committed
2057
				+ (ulint)get_field_offset(table, field)),
unknown's avatar
unknown committed
2058 2059
					(ulint) field->pack_length());

unknown's avatar
unknown committed
2060 2061
			ut_a(get_field_offset(table, field)
						     == key_part->offset);
unknown's avatar
unknown committed
2062 2063 2064 2065 2066 2067 2068 2069
			if (blob_len > key_part->length) {
			        blob_len = key_part->length;
			}

			/* MySQL reserves 2 bytes for the length and the
			storage of the number is little-endian */

			ut_a(blob_len < 256);
2070
			*((byte*)buff) = (byte)blob_len;
unknown's avatar
unknown committed
2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085
			buff += 2;

			memcpy(buff, blob_data, blob_len);

			buff += key_part->length;
		} else {
		        if (is_null) {
				 buff += key_part->length;
				 
				 continue;
			}
			memcpy(buff, record + key_part->offset,
							key_part->length);
			buff += key_part->length;
		}
2086 2087
  	}

2088
	ut_a(buff <= buff_start + buff_len);
unknown's avatar
unknown committed
2089 2090

	DBUG_RETURN((uint)(buff - buff_start));
2091 2092 2093
}

/******************************************************************
unknown's avatar
unknown committed
2094 2095
Builds a 'template' to the prebuilt struct. The template is used in fast
retrieval of just those column values MySQL needs in its processing. */
unknown's avatar
unknown committed
2096
static
2097
void
2098 2099 2100 2101 2102 2103 2104 2105 2106
build_template(
/*===========*/
	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
	THD*		thd,		/* in: current user thread, used
					only if templ_type is
					ROW_MYSQL_REC_FIELDS */
	TABLE*		table,		/* in: MySQL table */
	ulint		templ_type)	/* in: ROW_MYSQL_WHOLE_ROW or
					ROW_MYSQL_REC_FIELDS */
2107
{
2108 2109
	dict_index_t*	index;
	dict_index_t*	clust_index;
2110
	mysql_row_templ_t* templ;
2111
	Field*		field;
2112 2113
	ulint		n_fields;
	ulint		n_requested_fields	= 0;
unknown's avatar
Merge  
unknown committed
2114
	ibool		fetch_all_in_key	= FALSE;
unknown's avatar
unknown committed
2115
	ibool		fetch_primary_key_cols	= FALSE;
2116
	ulint		i;
2117

unknown's avatar
unknown committed
2118 2119 2120 2121
	if (prebuilt->select_lock_type == LOCK_X) {
		/* We always retrieve the whole clustered index record if we
		use exclusive row level locks, for example, if the read is
		done in an UPDATE statement. */
unknown's avatar
unknown committed
2122

unknown's avatar
unknown committed
2123 2124 2125
	        templ_type = ROW_MYSQL_WHOLE_ROW;
	}

unknown's avatar
unknown committed
2126 2127 2128
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
	     if (prebuilt->hint_need_to_fetch_extra_cols
						== ROW_RETRIEVE_ALL_COLS) {
2129

unknown's avatar
unknown committed
2130 2131
		/* We know we must at least fetch all columns in the key, or
		all columns in the table */
unknown's avatar
unknown committed
2132

unknown's avatar
Merge  
unknown committed
2133
		if (prebuilt->read_just_key) {
unknown's avatar
unknown committed
2134
			/* MySQL has instructed us that it is enough to
2135 2136 2137 2138 2139
			fetch the columns in the key; looks like MySQL
			can set this flag also when there is only a
			prefix of the column in the key: in that case we
			retrieve the whole column from the clustered
			index */
unknown's avatar
unknown committed
2140

unknown's avatar
Merge  
unknown committed
2141 2142 2143 2144
			fetch_all_in_key = TRUE;
		} else {
			templ_type = ROW_MYSQL_WHOLE_ROW;
		}
unknown's avatar
unknown committed
2145 2146
	    } else if (prebuilt->hint_need_to_fetch_extra_cols
						== ROW_RETRIEVE_PRIMARY_KEY) {
unknown's avatar
unknown committed
2147 2148 2149 2150 2151
		/* We must at least fetch all primary key cols. Note that if
		the clustered index was internally generated by InnoDB on the
		row id (no primary key was defined), then
		row_search_for_mysql() will always retrieve the row id to a
		special buffer in the prebuilt struct. */
unknown's avatar
unknown committed
2152 2153 2154

		fetch_primary_key_cols = TRUE;
	    }
2155 2156
	}

unknown's avatar
unknown committed
2157
	clust_index = dict_table_get_first_index_noninline(prebuilt->table);
unknown's avatar
unknown committed
2158

2159
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
unknown's avatar
unknown committed
2160
		index = prebuilt->index;
2161 2162
	} else {
		index = clust_index;
2163
	}
2164

2165 2166 2167 2168 2169 2170 2171
	if (index == clust_index) {
		prebuilt->need_to_access_clustered = TRUE;
	} else {
		prebuilt->need_to_access_clustered = FALSE;
		/* Below we check column by column if we need to access
		the clustered index */
	}
2172

unknown's avatar
unknown committed
2173
	n_fields = (ulint)table->fields; /* number of columns */
2174 2175 2176 2177 2178 2179

	if (!prebuilt->mysql_template) {
		prebuilt->mysql_template = (mysql_row_templ_t*)
						mem_alloc_noninline(
					n_fields * sizeof(mysql_row_templ_t));
	}
2180

2181 2182
	prebuilt->template_type = templ_type;
	prebuilt->null_bitmap_len = table->null_bytes;
2183

2184 2185
	prebuilt->templ_contains_blob = FALSE;

unknown's avatar
unknown committed
2186 2187
	/* Note that in InnoDB, i is the column number. MySQL calls columns
	'fields'. */
2188
	for (i = 0; i < n_fields; i++) {
2189
		templ = prebuilt->mysql_template + n_requested_fields;
2190 2191
		field = table->field[i];

2192
		if (templ_type == ROW_MYSQL_REC_FIELDS
unknown's avatar
unknown committed
2193 2194
		    && !(fetch_all_in_key
			 && dict_index_contains_col_or_prefix(index, i))
unknown's avatar
unknown committed
2195 2196
		    && !(fetch_primary_key_cols
			 && dict_table_col_in_clustered_key(index->table, i))
unknown's avatar
unknown committed
2197 2198 2199
		    && thd->query_id != field->query_id) {

			/* This field is not needed in the query, skip it */
2200 2201 2202 2203 2204

			goto skip_field;
		}

		n_requested_fields++;
2205

2206
		templ->col_no = i;
2207

2208 2209 2210
		if (index == clust_index) {
			templ->rec_field_no = (index->table->cols + i)
								->clust_pos;
2211
		} else {
2212 2213
			templ->rec_field_no = dict_index_get_nth_col_pos(
								index, i);
2214 2215
		}

2216 2217 2218 2219 2220 2221 2222 2223
		if (templ->rec_field_no == ULINT_UNDEFINED) {
			prebuilt->need_to_access_clustered = TRUE;
		}

		if (field->null_ptr) {
			templ->mysql_null_byte_offset =
				(ulint) ((char*) field->null_ptr
					- (char*) table->record[0]);
2224

2225 2226 2227 2228
			templ->mysql_null_bit_mask = (ulint) field->null_bit;
		} else {
			templ->mysql_null_bit_mask = 0;
		}
2229

unknown's avatar
unknown committed
2230 2231 2232
		templ->mysql_col_offset = (ulint)
					get_field_offset(table, field);

2233 2234 2235
		templ->mysql_col_len = (ulint) field->pack_length();
		templ->type = get_innobase_type_from_mysql_type(field);
		templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
2236

2237 2238
		if (templ->type == DATA_BLOB) {
			prebuilt->templ_contains_blob = TRUE;
2239
		}
2240 2241 2242
skip_field:
		;
	}
2243

2244
	prebuilt->n_template = n_requested_fields;
2245

unknown's avatar
unknown committed
2246
	if (index != clust_index && prebuilt->need_to_access_clustered) {
2247 2248 2249 2250
		/* Change rec_field_no's to correspond to the clustered index
		record */
		for (i = 0; i < n_requested_fields; i++) {
			templ = prebuilt->mysql_template + i;
2251

2252 2253 2254
			templ->rec_field_no =
			    (index->table->cols + templ->col_no)->clust_pos;
		}
2255
	}
2256 2257 2258
}

/************************************************************************
2259
Stores a row in an InnoDB database, to the table specified in this
2260 2261 2262 2263 2264
handle. */

int
ha_innobase::write_row(
/*===================*/
2265 2266
				/* out: error code */
	mysql_byte* 	record)	/* in: a row in MySQL format */
2267
{
2268
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
2269
  	int 		error;
2270
	longlong	auto_inc;
unknown's avatar
unknown committed
2271
	longlong	dummy;
2272 2273
	ibool           incremented_auto_inc_for_stat = FALSE;
	ibool           incremented_auto_inc_counter = FALSE;
unknown's avatar
unknown committed
2274
	ibool           skip_auto_inc_decr;
unknown's avatar
unknown committed
2275

2276
  	DBUG_ENTER("ha_innobase::write_row");
2277

unknown's avatar
unknown committed
2278 2279 2280 2281
	if (prebuilt->trx !=
			(trx_t*) current_thd->transaction.all.innobase_tid) {
		fprintf(stderr,
"InnoDB: Error: the transaction object for the table handle is at\n"
2282 2283 2284 2285 2286 2287 2288 2289 2290
"InnoDB: %p, but for the current thread it is at %p\n",
			prebuilt->trx,
			current_thd->transaction.all.innobase_tid);
		fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
		ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200);
		fputs("\n"
			"InnoDB: Dump of 200 bytes around transaction.all: ",
			stderr);
		ut_print_buf(stderr,
unknown's avatar
unknown committed
2291
			((byte*)(&(current_thd->transaction.all))) - 100, 200);
2292 2293
		putc('\n', stderr);
		ut_error;
unknown's avatar
unknown committed
2294
	}
unknown's avatar
unknown committed
2295

2296 2297
  	statistic_increment(ha_write_count, &LOCK_status);

2298 2299
        if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
                table->timestamp_field->set_time();
2300

unknown's avatar
unknown committed
2301 2302 2303
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
2304 2305

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
2306 2307
	}

2308
  	if (table->next_number_field && record == table->record[0]) {
unknown's avatar
unknown committed
2309 2310
		/* This is the case where the table has an
		auto-increment column */
unknown's avatar
unknown committed
2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335

		/* Initialize the auto-inc counter if it has not been
		initialized yet */

		if (0 == dict_table_autoinc_peek(prebuilt->table)) {

			/* This call initializes the counter */
		        error = innobase_read_and_init_auto_inc(&dummy);

			if (error) {
				/* Deadlock or lock wait timeout */

				goto func_exit;
			}

			/* We have to set sql_stat_start to TRUE because
			the above call probably has called a select, and
			has reset that flag; row_insert_for_mysql has to
			know to set the IX intention lock on the table,
			something it only does at the start of each
			statement */

			prebuilt->sql_stat_start = TRUE;
		}

2336 2337
	        /* Fetch the value the user possibly has set in the
	        autoincrement field */
unknown's avatar
unknown committed
2338

2339 2340
	        auto_inc = table->next_number_field->val_int();

2341 2342 2343
		/* In replication and also otherwise the auto-inc column 
		can be set with SET INSERT_ID. Then we must look at
		user_thd->next_insert_id. If it is nonzero and the user
unknown's avatar
Merge  
unknown committed
2344 2345 2346
		has not supplied a value, we must use it, and use values
		incremented by 1 in all subsequent inserts within the
		same SQL statement! */
2347 2348 2349

		if (auto_inc == 0 && user_thd->next_insert_id != 0) {
		        auto_inc = user_thd->next_insert_id;
unknown's avatar
Merge  
unknown committed
2350
		        auto_inc_counter_for_this_stat = auto_inc;
2351
		}
2352

unknown's avatar
Merge  
unknown committed
2353 2354 2355 2356 2357 2358
		if (auto_inc == 0 && auto_inc_counter_for_this_stat) {
			/* The user set the auto-inc counter for
			this SQL statement with SET INSERT_ID. We must
			assign sequential values from the counter. */

			auto_inc_counter_for_this_stat++;
2359
			incremented_auto_inc_for_stat = TRUE;
unknown's avatar
Merge  
unknown committed
2360 2361 2362 2363 2364 2365 2366

			auto_inc = auto_inc_counter_for_this_stat;

			/* We give MySQL a new value to place in the
			auto-inc column */
			user_thd->next_insert_id = auto_inc;
		}
unknown's avatar
unknown committed
2367

2368
		if (auto_inc != 0) {
unknown's avatar
unknown committed
2369 2370 2371
			/* This call will calculate the max of the current
			value and the value supplied by the user and
			update the counter accordingly */
2372 2373 2374 2375 2376 2377 2378 2379

			/* We have to use the transactional lock mechanism
			on the auto-inc counter of the table to ensure
			that replication and roll-forward of the binlog
			exactly imitates also the given auto-inc values.
			The lock is released at each SQL statement's
			end. */

unknown's avatar
unknown committed
2380
			innodb_srv_conc_enter_innodb(prebuilt->trx);
2381
			error = row_lock_table_autoinc_for_mysql(prebuilt);
unknown's avatar
unknown committed
2382
			innodb_srv_conc_exit_innodb(prebuilt->trx);
2383 2384

			if (error != DB_SUCCESS) {
unknown's avatar
unknown committed
2385

unknown's avatar
unknown committed
2386
				error = convert_error_code_to_mysql(error,
unknown's avatar
unknown committed
2387
								    user_thd);
2388 2389
				goto func_exit;
			}	
unknown's avatar
unknown committed
2390

2391 2392
			dict_table_autoinc_update(prebuilt->table, auto_inc);
		} else {
unknown's avatar
unknown committed
2393
			innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2394

2395 2396 2397 2398 2399
			if (!prebuilt->trx->auto_inc_lock) {

				error = row_lock_table_autoinc_for_mysql(
								prebuilt);
				if (error != DB_SUCCESS) {
unknown's avatar
unknown committed
2400 2401
 					innodb_srv_conc_exit_innodb(
							prebuilt->trx);
unknown's avatar
unknown committed
2402

2403
					error = convert_error_code_to_mysql(
unknown's avatar
unknown committed
2404
							error, user_thd);
2405 2406 2407 2408
					goto func_exit;
				}
			}	

2409 2410 2411
			/* The following call gets the value of the auto-inc
			counter of the table and increments it by 1 */

2412
			auto_inc = dict_table_autoinc_get(prebuilt->table);
2413 2414
			incremented_auto_inc_counter = TRUE;

unknown's avatar
unknown committed
2415
			innodb_srv_conc_exit_innodb(prebuilt->trx);
2416

unknown's avatar
unknown committed
2417 2418
			/* We can give the new value for MySQL to place in
			the field */
2419

unknown's avatar
unknown committed
2420
			user_thd->next_insert_id = auto_inc;
2421
		}
unknown's avatar
unknown committed
2422

unknown's avatar
unknown committed
2423 2424 2425
		/* This call of a handler.cc function places
		user_thd->next_insert_id to the column value, if the column
		value was not set by the user */
2426

unknown's avatar
unknown committed
2427 2428
    		update_auto_increment();
	}
2429

2430 2431 2432 2433
	if (prebuilt->mysql_template == NULL
			|| prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
		/* Build the template used in converting quickly between
		the two database formats */
2434

2435 2436
		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}
2437

unknown's avatar
unknown committed
2438
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2439

2440
	error = row_insert_for_mysql((byte*) record, prebuilt);
2441

unknown's avatar
unknown committed
2442
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2443

2444 2445 2446
	if (error != DB_SUCCESS) {
	        /* If the insert did not succeed we restore the value of
		the auto-inc counter we used; note that this behavior was
unknown's avatar
unknown committed
2447 2448 2449
		introduced only in version 4.0.4.
		NOTE that a REPLACE command handles a duplicate key error
		itself, and we must not decrement the autoinc counter
unknown's avatar
unknown committed
2450 2451 2452 2453 2454 2455
		if we are performing a REPLACE statement.
		NOTE 2: if there was an error, for example a deadlock,
		which caused InnoDB to roll back the whole transaction
		already in the call of row_insert_for_mysql(), we may no
		longer have the AUTO-INC lock, and cannot decrement
		the counter here. */
unknown's avatar
unknown committed
2456 2457

	        skip_auto_inc_decr = FALSE;
unknown's avatar
unknown committed
2458

unknown's avatar
unknown committed
2459
	        if (error == DB_DUPLICATE_KEY
2460 2461
		    && (user_thd->lex->sql_command == SQLCOM_REPLACE
			|| user_thd->lex->sql_command
unknown's avatar
unknown committed
2462
			                 == SQLCOM_REPLACE_SELECT)) {
unknown's avatar
unknown committed
2463 2464 2465

		        skip_auto_inc_decr= TRUE;
		}
2466

unknown's avatar
unknown committed
2467 2468
	        if (!skip_auto_inc_decr && incremented_auto_inc_counter
		    && prebuilt->trx->auto_inc_lock) {
unknown's avatar
unknown committed
2469
	                dict_table_autoinc_decrement(prebuilt->table);
2470 2471
	        }

unknown's avatar
unknown committed
2472 2473
		if (!skip_auto_inc_decr && incremented_auto_inc_for_stat
		    && prebuilt->trx->auto_inc_lock) {
2474 2475 2476 2477
		        auto_inc_counter_for_this_stat--;
		}
	}

unknown's avatar
unknown committed
2478
	error = convert_error_code_to_mysql(error, user_thd);
2479

2480
	/* Tell InnoDB server that there might be work for
2481
	utility threads: */
2482
func_exit:
2483
	innobase_active_small();
2484 2485 2486 2487

  	DBUG_RETURN(error);
}

2488
/******************************************************************
2489
Converts field data for storage in an InnoDB update vector. */
2490 2491 2492 2493 2494 2495 2496 2497 2498 2499
inline
mysql_byte*
innobase_convert_and_store_changed_col(
/*===================================*/
				/* out: pointer to the end of the converted
				data in the buffer */
	upd_field_t*	ufield,	/* in/out: field in the update vector */
	mysql_byte*	buf,	/* in: buffer we can use in conversion */
	mysql_byte*	data,	/* in: column data to store */
	ulint		len,	/* in: data len */
2500
	ulint		col_type,/* in: data type in InnoDB type numbers */
2501
	ulint		is_unsigned)/* in: != 0 if an unsigned integer type */
2502
{
2503 2504 2505 2506
	uint	i;

	if (len == UNIV_SQL_NULL) {
		data = NULL;
2507 2508
	} else if (col_type == DATA_VARCHAR || col_type == DATA_BINARY
		   || col_type == DATA_VARMYSQL) {
2509 2510 2511 2512
	        /* Remove trailing spaces */
        	while (len > 0 && data[len - 1] == ' ') {
	                len--;
	        }
2513
	} else if (col_type == DATA_INT) {
2514
		/* Store integer data in InnoDB in a big-endian
2515
		format, sign bit negated, if signed */
2516

2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527
		for (i = 0; i < len; i++) {
			buf[len - 1 - i] = data[i];
		}

		if (!is_unsigned) {
			buf[0] = buf[0] ^ 128;
		}

		data = buf;

		buf += len;
2528
	}
2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546

	ufield->new_val.data = data;
	ufield->new_val.len = len;

	return(buf);
}

/**************************************************************************
Checks which fields have changed in a row and stores information
of them to an update vector. */
static
int
calc_row_difference(
/*================*/
					/* out: error number or 0 */
	upd_t*		uvect,		/* in/out: update vector */
	mysql_byte* 	old_row,	/* in: old row in MySQL format */
	mysql_byte* 	new_row,	/* in: new row in MySQL format */
unknown's avatar
unknown committed
2547 2548
	struct st_table* table,		/* in: table in MySQL data
					dictionary */
2549
	mysql_byte*	upd_buff,	/* in: buffer to use */
unknown's avatar
unknown committed
2550
	ulint		buff_len,	/* in: buffer length */
2551
	row_prebuilt_t*	prebuilt,	/* in: InnoDB prebuilt struct */
2552 2553
	THD*		thd)		/* in: user thread */
{
unknown's avatar
unknown committed
2554
	mysql_byte*	original_upd_buff = upd_buff;
2555
	Field*		field;
2556 2557 2558
	uint		n_fields;
	ulint		o_len;
	ulint		n_len;
unknown's avatar
unknown committed
2559 2560 2561
	byte*	        o_ptr;
        byte*	        n_ptr;
        byte*	        buf;
2562
	upd_field_t*	ufield;
2563 2564
	ulint		col_type;
	ulint		is_unsigned;
2565
	ulint		n_changed = 0;
2566
	uint		i;
2567 2568 2569

	n_fields = table->fields;

2570
	/* We use upd_buff to convert changed fields */
unknown's avatar
unknown committed
2571
	buf = (byte*) upd_buff;
2572

2573 2574 2575
	for (i = 0; i < n_fields; i++) {
		field = table->field[i];

2576
		/* if (thd->query_id != field->query_id) { */
2577 2578
			/* TODO: check that these fields cannot have
			changed! */
2579

2580 2581
		/*	goto skip_field;
		}*/
2582

unknown's avatar
unknown committed
2583 2584
		o_ptr = (byte*) old_row + get_field_offset(table, field);
		n_ptr = (byte*) new_row + get_field_offset(table, field);
2585 2586 2587
		o_len = field->pack_length();
		n_len = field->pack_length();

2588
		col_type = get_innobase_type_from_mysql_type(field);
2589
		is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
2590 2591 2592 2593 2594 2595 2596 2597 2598 2599

		switch (col_type) {

		case DATA_BLOB:
			o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
			n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
			break;
		case DATA_VARCHAR:
		case DATA_BINARY:
		case DATA_VARMYSQL:
unknown's avatar
unknown committed
2600 2601 2602 2603
			o_ptr = row_mysql_read_var_ref_noninline(&o_len,
								o_ptr);
			n_ptr = row_mysql_read_var_ref_noninline(&n_len,
								n_ptr);
2604 2605 2606
		default:
			;
		}
2607

2608
		if (field->null_ptr) {
unknown's avatar
unknown committed
2609 2610
			if (field_in_record_is_null(table, field,
							(char*) old_row)) {
2611 2612
				o_len = UNIV_SQL_NULL;
			}
2613

unknown's avatar
unknown committed
2614 2615
			if (field_in_record_is_null(table, field,
							(char*) new_row)) {
2616 2617 2618 2619 2620 2621 2622 2623 2624 2625
				n_len = UNIV_SQL_NULL;
			}
		}

		if (o_len != n_len || (o_len != UNIV_SQL_NULL &&
					0 != memcmp(o_ptr, n_ptr, o_len))) {
			/* The field has changed */

			ufield = uvect->fields + n_changed;

unknown's avatar
unknown committed
2626 2627 2628 2629
			buf = (byte*)
                          innobase_convert_and_store_changed_col(ufield,
					  (mysql_byte*)buf,
					  (mysql_byte*)n_ptr, n_len, col_type,
2630
						is_unsigned);
2631
			ufield->exp = NULL;
2632 2633
			ufield->field_no =
					(prebuilt->table->cols + i)->clust_pos;
2634 2635 2636 2637 2638 2639 2640
			n_changed++;
		}
	}

	uvect->n_fields = n_changed;
	uvect->info_bits = 0;

unknown's avatar
unknown committed
2641 2642
	ut_a(buf <= (byte*)original_upd_buff + buff_len);

2643 2644 2645 2646 2647 2648 2649
	return(0);
}

/**************************************************************************
Updates a row given as a parameter to a new value. Note that we are given
whole rows, not just the fields which are updated: this incurs some
overhead for CPU when we check which fields are actually updated.
2650
TODO: currently InnoDB does not prevent the 'Halloween problem':
2651 2652
in a searched update a single row can get updated several times
if its index columns are updated! */
2653

2654 2655 2656 2657
int
ha_innobase::update_row(
/*====================*/
					/* out: error number or 0 */
2658 2659
	const mysql_byte* 	old_row,/* in: old row in MySQL format */
	mysql_byte* 		new_row)/* in: new row in MySQL format */
2660 2661 2662 2663 2664
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	upd_t*		uvect;
	int		error = 0;

2665
	DBUG_ENTER("ha_innobase::update_row");
2666

unknown's avatar
unknown committed
2667 2668
	ut_ad(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
2669

2670 2671
        if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
                table->timestamp_field->set_time();
2672

unknown's avatar
unknown committed
2673 2674 2675
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
2676 2677

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
2678 2679
	}

2680 2681 2682 2683 2684
	if (prebuilt->upd_node) {
		uvect = prebuilt->upd_node->update;
	} else {
		uvect = row_get_prebuilt_update_vector(prebuilt);
	}
2685 2686 2687 2688

	/* Build an update vector from the modified fields in the rows
	(uses upd_buff of the handle) */

2689
	calc_row_difference(uvect, (mysql_byte*) old_row, new_row, table,
unknown's avatar
unknown committed
2690 2691 2692
			upd_buff, (ulint)upd_and_key_val_buff_len,
			prebuilt, user_thd);

2693 2694 2695
	/* This is not a delete */
	prebuilt->upd_node->is_delete = FALSE;

unknown's avatar
unknown committed
2696
	assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
2697

unknown's avatar
unknown committed
2698
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2699

2700
	error = row_update_for_mysql((byte*) old_row, prebuilt);
2701

unknown's avatar
unknown committed
2702
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2703

unknown's avatar
unknown committed
2704
	error = convert_error_code_to_mysql(error, user_thd);
2705

2706
	/* Tell InnoDB server that there might be work for
2707 2708
	utility threads: */

2709
	innobase_active_small();
2710 2711 2712 2713 2714 2715 2716 2717 2718 2719

	DBUG_RETURN(error);
}

/**************************************************************************
Deletes a row given as the parameter. */

int
ha_innobase::delete_row(
/*====================*/
2720 2721
					/* out: error number or 0 */
	const mysql_byte* record)	/* in: a row in MySQL format */
2722 2723 2724 2725
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int		error = 0;

2726
	DBUG_ENTER("ha_innobase::delete_row");
2727

unknown's avatar
unknown committed
2728 2729
	ut_ad(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
2730

unknown's avatar
unknown committed
2731 2732 2733
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
2734 2735

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
2736 2737
	}

2738 2739 2740
	if (!prebuilt->upd_node) {
		row_get_prebuilt_update_vector(prebuilt);
	}
2741 2742

	/* This is a delete */
2743

2744
	prebuilt->upd_node->is_delete = TRUE;
2745

unknown's avatar
unknown committed
2746
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2747

2748
	error = row_update_for_mysql((byte*) record, prebuilt);
2749

unknown's avatar
unknown committed
2750
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2751

unknown's avatar
unknown committed
2752
	error = convert_error_code_to_mysql(error, user_thd);
2753

2754
	/* Tell the InnoDB server that there might be work for
2755 2756
	utility threads: */

2757
	innobase_active_small();
2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773

	DBUG_RETURN(error);
}

/**********************************************************************
Initializes a handle to use an index. */

int
ha_innobase::index_init(
/*====================*/
			/* out: 0 or error number */
	uint 	keynr)	/* in: key (index) number */
{
	int 	error	= 0;
  	DBUG_ENTER("index_init");

unknown's avatar
Merge  
unknown committed
2774
	error = change_active_index(keynr);
2775 2776 2777 2778 2779

  	DBUG_RETURN(error);
}

/**********************************************************************
2780
Currently does nothing. */
2781 2782 2783 2784 2785 2786 2787

int
ha_innobase::index_end(void)
/*========================*/
{
	int 	error	= 0;
  	DBUG_ENTER("index_end");
unknown's avatar
unknown committed
2788
        active_index=MAX_KEY;
2789 2790 2791 2792 2793
  	DBUG_RETURN(error);
}

/*************************************************************************
Converts a search mode flag understood by MySQL to a flag understood
2794
by InnoDB. */
2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808
inline
ulint
convert_search_mode_to_innobase(
/*============================*/
	enum ha_rkey_function	find_flag)
{
	switch (find_flag) {
  		case HA_READ_KEY_EXACT:		return(PAGE_CUR_GE);
  			/* the above does not require the index to be UNIQUE */
  		case HA_READ_KEY_OR_NEXT:	return(PAGE_CUR_GE);
		case HA_READ_KEY_OR_PREV:	return(PAGE_CUR_LE);
		case HA_READ_AFTER_KEY:		return(PAGE_CUR_G);
		case HA_READ_BEFORE_KEY:	return(PAGE_CUR_L);
		case HA_READ_PREFIX:		return(PAGE_CUR_GE);
unknown's avatar
unknown committed
2809
	        case HA_READ_PREFIX_LAST:       return(PAGE_CUR_LE);
unknown's avatar
unknown committed
2810 2811 2812
                case HA_READ_PREFIX_LAST_OR_PREV:return(PAGE_CUR_LE);
		  /* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always
		  pass a complete-field prefix of a key value as the search
unknown's avatar
unknown committed
2813 2814 2815 2816 2817
		  tuple. I.e., it is not allowed that the last field would
		  just contain n first bytes of the full field value.
		  MySQL uses a 'padding' trick to convert LIKE 'abc%'
		  type queries so that it can use as a search tuple
		  a complete-field-prefix of a key value. Thus, the InnoDB
unknown's avatar
unknown committed
2818 2819 2820 2821 2822 2823 2824
		  search mode PAGE_CUR_LE_OR_EXTENDS is never used.
		  TODO: when/if MySQL starts to use also partial-field
		  prefixes, we have to deal with stripping of spaces
		  and comparison of non-latin1 char type fields in
		  innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to
		  work correctly. */

2825 2826 2827 2828 2829
		default:			assert(0);
	}

	return(0);
}
2830

unknown's avatar
unknown committed
2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879
/*
   BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
   ---------------------------------------------------
The following does not cover all the details, but explains how we determine
the start of a new SQL statement, and what is associated with it.

For each table in the database the MySQL interpreter may have several
table handle instances in use, also in a single SQL query. For each table
handle instance there is an InnoDB  'prebuilt' struct which contains most
of the InnoDB data associated with this table handle instance.

  A) if the user has not explicitly set any MySQL table level locks:

  1) MySQL calls ::external_lock to set an 'intention' table level lock on
the table of the handle instance. There we set
prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
true if we are taking this table handle instance to use in a new SQL
statement issued by the user. We also increment trx->n_mysql_tables_in_use.

  2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
instructions to prebuilt->template of the table handle instance in
::index_read. The template is used to save CPU time in large joins.

  3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
allocate a new consistent read view for the trx if it does not yet have one,
or in the case of a locking read, set an InnoDB 'intention' table level
lock on the table.

  4) We do the SELECT. MySQL may repeatedly call ::index_read for the
same table handle instance, if it is a join.

  5) When the SELECT ends, MySQL removes its intention table level locks
in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
 (a) we execute a COMMIT there if the autocommit is on,
 (b) we also release possible 'SQL statement level resources' InnoDB may
have for this SQL statement. The MySQL interpreter does NOT execute
autocommit for pure read transactions, though it should. That is why the
table handler in that case has to execute the COMMIT in ::external_lock.

  B) If the user has explicitly set MySQL table level locks, then MySQL
does NOT call ::external_lock at the start of the statement. To determine
when we are at the start of a new SQL statement we at the start of
::index_read also compare the query id to the latest query id where the
table handle instance was used. If it has changed, we know we are at the
start of a new SQL statement. Since the query id can theoretically
overwrap, we use this test only as a secondary way of determining the
start of a new SQL statement. */


2880 2881 2882 2883 2884 2885 2886 2887 2888
/**************************************************************************
Positions an index cursor to the index specified in the handle. Fetches the
row if any. */

int
ha_innobase::index_read(
/*====================*/
					/* out: 0, HA_ERR_KEY_NOT_FOUND,
					or error number */
2889
	mysql_byte*		buf,	/* in/out: buffer for the returned
2890
					row */
2891
	const mysql_byte* 	key_ptr,/* in: key value; if this is NULL
2892
					we position the cursor at the
unknown's avatar
unknown committed
2893 2894 2895
					start or end of index; this can
					also contain an InnoDB row id, in
					which case key_len is the InnoDB
unknown's avatar
unknown committed
2896 2897 2898 2899
					row id length; the key value can
					also be a prefix of a full key value,
					and the last column can be a prefix
					of a full column */
2900
	uint			key_len,/* in: key value length */
2901 2902 2903 2904 2905 2906 2907 2908 2909 2910
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	ulint		mode;
	dict_index_t*	index;
	ulint		match_mode 	= 0;
	int 		error;
	ulint		ret;

  	DBUG_ENTER("index_read");
unknown's avatar
unknown committed
2911

unknown's avatar
unknown committed
2912 2913
	ut_ad(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
2914

2915
  	statistic_increment(ha_read_key_count, &LOCK_status);
2916

unknown's avatar
unknown committed
2917 2918 2919
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
2920 2921

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
2922 2923
	}

2924
	index = prebuilt->index;
2925

unknown's avatar
unknown committed
2926 2927
	/* Note that if the index for which the search template is built is not
        necessarily prebuilt->index, but can also be the clustered index */
2928

2929 2930 2931 2932
	if (prebuilt->sql_stat_start) {
		build_template(prebuilt, user_thd, table,
							ROW_MYSQL_REC_FIELDS);
	}
2933 2934

	if (key_ptr) {
unknown's avatar
unknown committed
2935 2936 2937
	        /* Convert the search key value to InnoDB format into
		prebuilt->search_tuple */

2938
		row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple,
unknown's avatar
unknown committed
2939 2940 2941 2942
					(byte*) key_val_buff,
					(ulint)upd_and_key_val_buff_len,
					index,
					(byte*) key_ptr,
2943
					(ulint) key_len, prebuilt->trx);
2944 2945 2946 2947 2948 2949
	} else {
		/* We position the cursor to the last or the first entry
		in the index */

 		dtuple_set_n_fields(prebuilt->search_tuple, 0);
	}
2950

2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964
	mode = convert_search_mode_to_innobase(find_flag);

	match_mode = 0;

	if (find_flag == HA_READ_KEY_EXACT) {
		match_mode = ROW_SEL_EXACT;

	} else if (find_flag == HA_READ_PREFIX
				|| find_flag == HA_READ_PREFIX_LAST) {
		match_mode = ROW_SEL_EXACT_PREFIX;
	}

	last_match_mode = match_mode;

unknown's avatar
unknown committed
2965
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2966

unknown's avatar
unknown committed
2967
	ret = row_search_for_mysql((byte*) buf, mode, prebuilt, match_mode, 0);
2968

unknown's avatar
unknown committed
2969
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2970

2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982
	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;
	} else {
unknown's avatar
unknown committed
2983
		error = convert_error_code_to_mysql(ret, user_thd);
2984 2985
		table->status = STATUS_NOT_FOUND;
	}
2986

2987 2988 2989
	DBUG_RETURN(error);
}

unknown's avatar
unknown committed
2990 2991 2992
/***********************************************************************
The following functions works like index_read, but it find the last
row with the current key value or prefix. */
2993 2994

int
unknown's avatar
unknown committed
2995 2996 2997 2998 2999 3000 3001 3002 3003
ha_innobase::index_read_last(
/*=========================*/
			           /* out: 0, HA_ERR_KEY_NOT_FOUND, or an
				   error code */
        mysql_byte*       buf,     /* out: fetched row */
        const mysql_byte* key_ptr, /* in: key value, or a prefix of a full
				   key value */
	uint              key_len) /* in: length of the key val or prefix
				   in bytes */
3004
{
unknown's avatar
unknown committed
3005
        return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
3006 3007
}

3008
/************************************************************************
unknown's avatar
unknown committed
3009
Changes the active index of a handle. */
3010 3011 3012 3013

int
ha_innobase::change_active_index(
/*=============================*/
3014 3015 3016
			/* out: 0 or error code */
	uint 	keynr)	/* in: use this index; MAX_KEY means always clustered
			index, even if it was internally generated by
3017
			InnoDB */
3018
{
unknown's avatar
unknown committed
3019 3020 3021 3022
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	KEY*		key=0;
	statistic_increment(ha_read_key_count, &LOCK_status);
	DBUG_ENTER("change_active_index");
3023

unknown's avatar
unknown committed
3024 3025 3026
	ut_ad(user_thd == current_thd);
	ut_ad(prebuilt->trx ==
	     (trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
3027

unknown's avatar
unknown committed
3028
	active_index = keynr;
3029

unknown's avatar
unknown committed
3030 3031
	if (keynr != MAX_KEY && table->keys > 0) {
		key = table->key_info + active_index;
3032

unknown's avatar
unknown committed
3033
		prebuilt->index = dict_table_get_index_noninline(
unknown's avatar
unknown committed
3034 3035
						     prebuilt->table,
						     key->name);
unknown's avatar
unknown committed
3036 3037
        } else {
		prebuilt->index = dict_table_get_first_index_noninline(
unknown's avatar
unknown committed
3038
							   prebuilt->table);
unknown's avatar
unknown committed
3039
	}
3040

unknown's avatar
unknown committed
3041 3042 3043 3044 3045 3046
	if (!prebuilt->index) {
	       sql_print_error(
"Innodb could not find key n:o %u with name %s from dict cache for table %s",
	      keynr, key ? key->name : "NULL", prebuilt->table->name);
	      DBUG_RETURN(1);
	}
3047

unknown's avatar
unknown committed
3048
	assert(prebuilt->search_tuple != 0);
unknown's avatar
Merge  
unknown committed
3049

unknown's avatar
unknown committed
3050
	dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
3051

unknown's avatar
unknown committed
3052
	dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
3053
			prebuilt->index->n_fields);
3054

unknown's avatar
unknown committed
3055 3056 3057 3058 3059
	/* MySQL changes the active index for a handle also during some
	queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
	and then calculates the sum. Previously we played safe and used
	the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
	copying. Starting from MySQL-4.1 we use a more efficient flag here. */
3060

unknown's avatar
unknown committed
3061
	build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
3062

unknown's avatar
unknown committed
3063
	DBUG_RETURN(0);
3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074
}

/**************************************************************************
Positions an index cursor to the index specified in keynr. Fetches the
row if any. */
/* ??? This is only used to read whole keys ??? */

int
ha_innobase::index_read_idx(
/*========================*/
					/* out: error number or 0 */
3075
	mysql_byte*	buf,		/* in/out: buffer for the returned
3076 3077
					row */
	uint 		keynr,		/* in: use this index */
3078
	const mysql_byte* key,		/* in: key value; if this is NULL
3079 3080 3081 3082 3083
					we position the cursor at the
					start or end of index */
	uint		key_len,	/* in: key value length */
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
unknown's avatar
Merge  
unknown committed
3084 3085 3086 3087
	if (change_active_index(keynr)) {

		return(1);
	}
3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100

	return(index_read(buf, key, key_len, find_flag));
}

/***************************************************************************
Reads the next or previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::general_fetch(
/*=======================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3101
	mysql_byte* 	buf,	/* in/out: buffer for next row in MySQL
3102 3103 3104 3105 3106 3107 3108 3109
				format */
	uint 	direction,	/* in: ROW_SEL_NEXT or ROW_SEL_PREV */
	uint	match_mode)	/* in: 0, ROW_SEL_EXACT, or
				ROW_SEL_EXACT_PREFIX */
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	ulint		ret;
	int		error	= 0;
3110

3111
	DBUG_ENTER("general_fetch");
3112

unknown's avatar
unknown committed
3113 3114
	ut_ad(prebuilt->trx ==
	     (trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
3115

unknown's avatar
unknown committed
3116
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
unknown committed
3117

unknown's avatar
Merge  
unknown committed
3118 3119
	ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode,
								direction);
unknown's avatar
unknown committed
3120
	innodb_srv_conc_exit_innodb(prebuilt->trx);
3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133

	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;
	} else {
unknown's avatar
unknown committed
3134
		error = convert_error_code_to_mysql(ret, user_thd);
3135 3136
		table->status = STATUS_NOT_FOUND;
	}
3137

3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149
	DBUG_RETURN(error);
}

/***************************************************************************
Reads the next row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_next(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3150
	mysql_byte* 	buf)	/* in/out: buffer for next row in MySQL
3151 3152
				format */
{
3153 3154
  	statistic_increment(ha_read_next_count, &LOCK_status);

3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165
	return(general_fetch(buf, ROW_SEL_NEXT, 0));
}

/***********************************************************************
Reads the next row matching to the key value given as the parameter. */

int
ha_innobase::index_next_same(
/*=========================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3166 3167
	mysql_byte* 	buf,	/* in/out: buffer for the row */
	const mysql_byte* key,	/* in: key value */
3168 3169
	uint 		keylen)	/* in: key value length */
{
3170
  	statistic_increment(ha_read_next_count, &LOCK_status);
3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183

	return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
}

/***************************************************************************
Reads the previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_prev(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3184
	mysql_byte* 	buf)	/* in/out: buffer for previous row in MySQL
3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196
				format */
{
	return(general_fetch(buf, ROW_SEL_PREV, 0));
}

/************************************************************************
Positions a cursor on the first record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_first(
/*=====================*/
3197
				/* out: 0, HA_ERR_END_OF_FILE,
3198 3199
				or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
3200 3201 3202 3203 3204 3205 3206 3207
{
	int	error;

  	DBUG_ENTER("index_first");
  	statistic_increment(ha_read_first_count, &LOCK_status);

  	error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);

3208 3209 3210 3211 3212 3213
        /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */

  	if (error == HA_ERR_KEY_NOT_FOUND) {
  		error = HA_ERR_END_OF_FILE;
  	}

3214 3215 3216 3217 3218 3219 3220 3221 3222 3223
  	DBUG_RETURN(error);
}

/************************************************************************
Positions a cursor on the last record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_last(
/*====================*/
3224 3225
				/* out: 0, HA_ERR_END_OF_FILE, or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
3226 3227 3228
{
	int	error;

3229
  	DBUG_ENTER("index_last");
3230
  	statistic_increment(ha_read_last_count, &LOCK_status);
3231 3232 3233

  	error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);

3234
        /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249

  	if (error == HA_ERR_KEY_NOT_FOUND) {
  		error = HA_ERR_END_OF_FILE;
  	}

  	DBUG_RETURN(error);
}

/********************************************************************
Initialize a table scan. */

int
ha_innobase::rnd_init(
/*==================*/
			/* out: 0 or error number */
3250
	bool	scan)	/* in: ???????? */
3251
{
unknown's avatar
Merge  
unknown committed
3252
	int	err;
unknown's avatar
unknown committed
3253

3254
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
3255

unknown's avatar
unknown committed
3256 3257 3258
	/* Store the active index value so that we can restore the original
	value after a scan */

3259
	if (prebuilt->clust_index_was_generated) {
unknown's avatar
Merge  
unknown committed
3260
		err = change_active_index(MAX_KEY);
3261
	} else {
unknown's avatar
Merge  
unknown committed
3262
		err = change_active_index(primary_key);
3263
	}
3264

3265
  	start_of_scan = 1;
3266

unknown's avatar
Merge  
unknown committed
3267
 	return(err);
3268 3269 3270
}

/*********************************************************************
unknown's avatar
unknown committed
3271
Ends a table scan. */
3272 3273 3274 3275 3276 3277

int
ha_innobase::rnd_end(void)
/*======================*/
				/* out: 0 or error number */
{
unknown's avatar
unknown committed
3278
	return(index_end());
3279 3280 3281 3282 3283 3284 3285 3286 3287 3288
}

/*********************************************************************
Reads the next row in a table scan (also used to read the FIRST row
in a table scan). */

int
ha_innobase::rnd_next(
/*==================*/
			/* out: 0, HA_ERR_END_OF_FILE, or error number */
3289
	mysql_byte* buf)/* in/out: returns the row in this buffer,
3290 3291
			in MySQL format */
{
3292
	int	error;
3293 3294 3295 3296

  	DBUG_ENTER("rnd_next");
  	statistic_increment(ha_read_rnd_next_count, &LOCK_status);

3297
  	if (start_of_scan) {
3298 3299 3300 3301
		error = index_first(buf);
		if (error == HA_ERR_KEY_NOT_FOUND) {
			error = HA_ERR_END_OF_FILE;
		}
3302
		start_of_scan = 0;
3303
	} else {
3304
		error = general_fetch(buf, ROW_SEL_NEXT, 0);
3305
	}
3306

3307 3308 3309 3310
  	DBUG_RETURN(error);
}

/**************************************************************************
unknown's avatar
unknown committed
3311
Fetches a row from the table based on a row reference. */
3312

3313 3314 3315
int
ha_innobase::rnd_pos(
/*=================*/
3316 3317 3318
				/* out: 0, HA_ERR_KEY_NOT_FOUND,
				or error code */
	mysql_byte* 	buf,	/* in/out: buffer for the row */
unknown's avatar
unknown committed
3319 3320 3321 3322 3323
	mysql_byte*	pos)	/* in: primary key value of the row in the
				MySQL format, or the row id if the clustered
				index was internally generated by InnoDB;
				the length of data in pos has to be
				ref_length */
3324
{
3325 3326 3327
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int		error;
	uint		keynr	= active_index;
3328
	DBUG_ENTER("rnd_pos");
unknown's avatar
unknown committed
3329
	DBUG_DUMP("key", (char*) pos, ref_length);
unknown's avatar
unknown committed
3330

3331
	statistic_increment(ha_read_rnd_count, &LOCK_status);
3332

unknown's avatar
unknown committed
3333 3334
	ut_ad(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
3335

3336 3337 3338 3339
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from the row id: the
		row reference is the row id, not any key value
unknown's avatar
unknown committed
3340
		that MySQL knows of */
3341

unknown's avatar
Merge  
unknown committed
3342
		error = change_active_index(MAX_KEY);
3343
	} else {
unknown's avatar
Merge  
unknown committed
3344
		error = change_active_index(primary_key);
3345
	}
3346

unknown's avatar
Merge  
unknown committed
3347
	if (error) {
unknown's avatar
unknown committed
3348
	        DBUG_PRINT("error",("Got error: %ld",error));
unknown's avatar
Merge  
unknown committed
3349 3350
		DBUG_RETURN(error);
	}
unknown's avatar
unknown committed
3351

unknown's avatar
unknown committed
3352 3353 3354 3355
	/* Note that we assume the length of the row reference is fixed
        for the table, and it is == ref_length */

	error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
unknown's avatar
unknown committed
3356 3357 3358 3359
	if (error)
	{
	  DBUG_PRINT("error",("Got error: %ld",error));
	}
3360
	change_active_index(keynr);
3361

3362 3363 3364 3365
  	DBUG_RETURN(error);
}

/*************************************************************************
3366
Stores a reference to the current row to 'ref' field of the handle. Note
unknown's avatar
unknown committed
3367 3368
that in the case where we have generated the clustered index for the
table, the function parameter is illogical: we MUST ASSUME that 'record'
unknown's avatar
unknown committed
3369
is the current 'position' of the handle, because if row ref is actually
3370
the row id internally generated in InnoDB, then 'record' does not contain
3371 3372
it. We just guess that the row id must be for the record where the handle
was positioned the last time. */
3373 3374 3375 3376

void
ha_innobase::position(
/*==================*/
3377
	const mysql_byte*	record)	/* in: row in MySQL format */
3378
{
3379 3380
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	uint		len;
3381

unknown's avatar
unknown committed
3382 3383
	ut_ad(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
3384

3385 3386 3387 3388
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from row id: the
		row reference will be the row id, not any key value
unknown's avatar
unknown committed
3389
		that MySQL knows of */
3390 3391 3392 3393 3394

		len = DATA_ROW_ID_LEN;

		memcpy(ref, prebuilt->row_id, len);
	} else {
3395 3396
		len = store_key_val_for_row(primary_key, (char*)ref,
							 ref_length, record);
3397
	}
3398

unknown's avatar
unknown committed
3399 3400 3401
	/* Since we do not store len to the buffer 'ref', we must assume
	that len is always fixed for this table. The following assertion
	checks this. */
unknown's avatar
unknown committed
3402
  
unknown's avatar
unknown committed
3403 3404 3405
	if (len != ref_length) {
	        fprintf(stderr,
	 "InnoDB: Error: stored ref len is %lu, but table ref len is %lu\n",
3406
		  (ulong)len, (ulong)ref_length);
unknown's avatar
unknown committed
3407
	}
3408 3409 3410
}

/*********************************************************************
3411
Creates a table definition to an InnoDB database. */
3412 3413 3414 3415
static
int
create_table_def(
/*=============*/
3416
	trx_t*		trx,		/* in: InnoDB transaction handle */
3417 3418
	TABLE*		form,		/* in: information on table
					columns and indexes */
unknown's avatar
unknown committed
3419 3420 3421 3422 3423 3424 3425 3426 3427
	const char*	table_name,	/* in: table name */
	const char*	path_of_temp_table)/* in: if this is a table explicitly
					created by the user with the
					TEMPORARY keyword, then this
					parameter is the dir path where the
					table should be placed if we create
					an .ibd file for it (no .ibd extension
					in the path, though); otherwise this
					is NULL */
3428 3429 3430 3431 3432 3433
{
	Field*		field;
	dict_table_t*	table;
	ulint		n_cols;
  	int 		error;
  	ulint		col_type;
3434 3435
  	ulint		nulls_allowed;
	ulint		unsigned_type;
unknown's avatar
unknown committed
3436
	ulint		binary_type;
unknown's avatar
unknown committed
3437
	ulint		charset_no;
3438
  	ulint		i;
3439

3440 3441 3442 3443 3444
  	DBUG_ENTER("create_table_def");
  	DBUG_PRINT("enter", ("table_name: %s", table_name));

	n_cols = form->fields;

unknown's avatar
unknown committed
3445 3446
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
3447 3448

	table = dict_mem_table_create((char*) table_name, 0, n_cols);
3449

unknown's avatar
unknown committed
3450 3451 3452 3453 3454
	if (path_of_temp_table) {
		table->dir_path_of_temp_table =
			mem_heap_strdup(table->heap, path_of_temp_table);
	}

3455 3456 3457
	for (i = 0; i < n_cols; i++) {
		field = form->field[i];

3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469
		col_type = get_innobase_type_from_mysql_type(field);
		if (field->null_ptr) {
			nulls_allowed = 0;
		} else {
			nulls_allowed = DATA_NOT_NULL;
		}

		if (field->flags & UNSIGNED_FLAG) {
			unsigned_type = DATA_UNSIGNED;
		} else {
			unsigned_type = 0;
		}
3470

unknown's avatar
unknown committed
3471
		if (field->binary()) {
unknown's avatar
unknown committed
3472 3473 3474 3475 3476
			binary_type = DATA_BINARY_TYPE;
		} else {
			binary_type = 0;
		}

unknown's avatar
unknown committed
3477 3478 3479 3480 3481 3482 3483 3484 3485 3486
		charset_no = 0;	

		if (dtype_is_string_type(col_type)) {

			charset_no = (ulint)field->charset()->number;

			ut_a(charset_no < 256); /* in ut0type.h we assume that
						the number fits in one byte */
		}

3487
		dict_mem_table_add_col(table, (char*) field->field_name,
unknown's avatar
unknown committed
3488 3489
					col_type, dtype_form_prtype( 
					(ulint)field->type()
unknown's avatar
unknown committed
3490
					| nulls_allowed | unsigned_type
3491
					| binary_type,
unknown's avatar
unknown committed
3492
					+ charset_no),
3493 3494 3495 3496 3497
					field->pack_length(), 0);
	}

	error = row_create_table_for_mysql(table, trx);

unknown's avatar
unknown committed
3498
	error = convert_error_code_to_mysql(error, NULL);
3499 3500 3501 3502 3503

	DBUG_RETURN(error);
}

/*********************************************************************
3504
Creates an index in an InnoDB database. */
3505 3506
static
int
3507 3508
create_index(
/*=========*/
3509
	trx_t*		trx,		/* in: InnoDB transaction handle */
3510 3511 3512 3513 3514
	TABLE*		form,		/* in: information on table
					columns and indexes */
	const char*	table_name,	/* in: table name */
	uint		key_num)	/* in: index number */
{
unknown's avatar
unknown committed
3515
	Field*		field;
3516
	dict_index_t*	index;
3517
  	int 		error;
3518 3519 3520 3521
	ulint		n_fields;
	KEY*		key;
	KEY_PART_INFO*	key_part;
	ulint		ind_type;
unknown's avatar
unknown committed
3522 3523
	ulint		col_type;
	ulint		prefix_len;
3524
  	ulint		i;
unknown's avatar
unknown committed
3525
  	ulint		j;
3526

3527
  	DBUG_ENTER("create_index");
3528

3529 3530 3531
	key = form->key_info + key_num;

    	n_fields = key->key_parts;
3532

3533 3534
    	ind_type = 0;

unknown's avatar
unknown committed
3535
    	if (key_num == form->primary_key) {
3536 3537
		ind_type = ind_type | DICT_CLUSTERED;
	}
3538

3539 3540 3541 3542
	if (key->flags & HA_NOSAME ) {
		ind_type = ind_type | DICT_UNIQUE;
	}

unknown's avatar
unknown committed
3543 3544
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
3545 3546 3547 3548 3549 3550

	index = dict_mem_index_create((char*) table_name, key->name, 0,
						ind_type, n_fields);
	for (i = 0; i < n_fields; i++) {
		key_part = key->key_part + i;

3551
		/* (The flag HA_PART_KEY_SEG denotes in MySQL a column prefix
unknown's avatar
unknown committed
3552 3553 3554 3555 3556 3557 3558 3559 3560 3561
		field in an index: we only store a specified number of first
		bytes of the column to the index field.) The flag does not
		seem to be properly set by MySQL. Let us fall back on testing
		the length of the key part versus the column. */
		
		field = NULL;
		for (j = 0; j < form->fields; j++) {

			field = form->field[j];

3562
			if (0 == ut_cmp_in_lower_case(
unknown's avatar
unknown committed
3563
					(char*)field->field_name,
3564
					(char*)key_part->field->field_name)) {
unknown's avatar
unknown committed
3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577
				/* Found the corresponding column */

				break;
			}
		}

		ut_a(j < form->fields);

		col_type = get_innobase_type_from_mysql_type(key_part->field);

		if (DATA_BLOB == col_type
		    || key_part->length < field->pack_length()) {

unknown's avatar
unknown committed
3578 3579 3580 3581 3582 3583 3584 3585
		        prefix_len = key_part->length;

			if (col_type == DATA_INT
			    || col_type == DATA_FLOAT
			    || col_type == DATA_DOUBLE
			    || col_type == DATA_DECIMAL) {
			        fprintf(stderr,
"InnoDB: error: MySQL is trying to create a column prefix index field\n"
unknown's avatar
unknown committed
3586 3587
"InnoDB: on an inappropriate data type. Table name %s, column name %s.\n",
				  table_name, key_part->field->field_name);
unknown's avatar
unknown committed
3588 3589 3590 3591 3592
			        
			        prefix_len = 0;
			}
		} else {
		        prefix_len = 0;
unknown's avatar
unknown committed
3593 3594
		}

3595 3596
		/* We assume all fields should be sorted in ascending
		order, hence the '0': */
unknown's avatar
unknown committed
3597

3598
		dict_mem_index_add_field(index,
unknown's avatar
unknown committed
3599 3600
				(char*) key_part->field->field_name,
				0, prefix_len);
3601 3602 3603 3604
	}

	error = row_create_index_for_mysql(index, trx);

unknown's avatar
unknown committed
3605
	error = convert_error_code_to_mysql(error, NULL);
3606 3607 3608 3609 3610

	DBUG_RETURN(error);
}

/*********************************************************************
3611
Creates an index to an InnoDB table when the user has defined no
3612
primary index. */
3613 3614
static
int
3615 3616
create_clustered_index_when_no_primary(
/*===================================*/
3617
	trx_t*		trx,		/* in: InnoDB transaction handle */
3618 3619 3620
	const char*	table_name)	/* in: table name */
{
	dict_index_t*	index;
3621 3622
  	int 		error;

unknown's avatar
unknown committed
3623 3624
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
3625

unknown's avatar
unknown committed
3626 3627 3628
	index = dict_mem_index_create((char*) table_name,
				      (char*) "GEN_CLUST_INDEX",
				      0, DICT_CLUSTERED, 0);
3629 3630
	error = row_create_index_for_mysql(index, trx);

unknown's avatar
unknown committed
3631
	error = convert_error_code_to_mysql(error, NULL);
3632

3633
	return(error);
3634 3635 3636
}

/*********************************************************************
3637
Creates a new table to an InnoDB database. */
3638 3639 3640 3641 3642 3643 3644 3645

int
ha_innobase::create(
/*================*/
					/* out: error number */
	const char*	name,		/* in: table name */
	TABLE*		form,		/* in: information on table
					columns and indexes */
3646 3647 3648
	HA_CREATE_INFO*	create_info)	/* in: more information of the
					created table, contains also the
					create statement string */
3649 3650 3651
{
	int		error;
	dict_table_t*	innobase_table;
unknown's avatar
unknown committed
3652
	trx_t*		parent_trx;
3653
	trx_t*		trx;
unknown's avatar
unknown committed
3654
	int		primary_key_no;
3655
	uint		i;
unknown's avatar
unknown committed
3656 3657
	char		name2[FN_REFLEN];
	char		norm_name[FN_REFLEN];
unknown's avatar
unknown committed
3658
	THD		*thd= current_thd;
3659

3660 3661
  	DBUG_ENTER("ha_innobase::create");

unknown's avatar
unknown committed
3662
	DBUG_ASSERT(thd != NULL);
unknown's avatar
unknown committed
3663

unknown's avatar
unknown committed
3664 3665 3666 3667
	if (form->fields > 1000) {
		/* The limit probably should be REC_MAX_N_FIELDS - 3 = 1020,
		but we play safe here */

unknown's avatar
unknown committed
3668
	        DBUG_RETURN(HA_ERR_TO_BIG_ROW);
unknown's avatar
unknown committed
3669 3670
	} 

unknown's avatar
unknown committed
3671 3672 3673
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
unknown's avatar
unknown committed
3674
	parent_trx = check_trx_exists(current_thd);
unknown's avatar
unknown committed
3675 3676 3677 3678 3679 3680

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	
	
3681
	trx = trx_allocate_for_mysql();
unknown's avatar
unknown committed
3682 3683 3684
		
	trx->mysql_thd = thd;
	trx->mysql_query_str = &((*thd).query);
3685

unknown's avatar
unknown committed
3686
	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
unknown's avatar
unknown committed
3687 3688 3689
		trx->check_foreigns = FALSE;
	}

unknown's avatar
unknown committed
3690
	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
unknown's avatar
unknown committed
3691 3692 3693
		trx->check_unique_secondary = FALSE;
	}

unknown's avatar
unknown committed
3694 3695 3696 3697 3698
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}
unknown's avatar
unknown committed
3699

unknown's avatar
unknown committed
3700
	fn_format(name2, name, "", "", 2);	// Remove the .frm extension
3701 3702

	normalize_table_name(norm_name, name2);
3703

unknown's avatar
unknown committed
3704
	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
unknown's avatar
unknown committed
3705
	or lock waits can happen in it during a table create operation.
unknown's avatar
unknown committed
3706
	Drop table etc. do this latching in row0mysql.c. */
unknown's avatar
unknown committed
3707

unknown's avatar
unknown committed
3708
	row_mysql_lock_data_dictionary(trx);
unknown's avatar
unknown committed
3709 3710

	/* Create the table definition in InnoDB */
3711

unknown's avatar
unknown committed
3712 3713 3714 3715 3716 3717 3718
	if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {

  		error = create_table_def(trx, form, norm_name, name2);
	} else {
		error = create_table_def(trx, form, norm_name, NULL);
	}

unknown's avatar
unknown committed
3719
  	if (error) {
unknown's avatar
unknown committed
3720
		innobase_commit_low(trx);
3721

unknown's avatar
unknown committed
3722
		row_mysql_unlock_data_dictionary(trx);
3723 3724 3725 3726 3727 3728

  		trx_free_for_mysql(trx);

 		DBUG_RETURN(error);
 	}

3729 3730
	/* Look for a primary key */

unknown's avatar
unknown committed
3731 3732 3733
	primary_key_no= (table->primary_key != MAX_KEY ?
			 (int) table->primary_key : 
			 -1);
3734

3735 3736 3737
	/* Our function row_get_mysql_key_number_for_index assumes
	the primary key is always number 0, if it exists */

unknown's avatar
unknown committed
3738
	DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0);
3739

3740 3741
	/* Create the keys */

3742 3743 3744
	if (form->keys == 0 || primary_key_no == -1) {
		/* Create an index which is used as the clustered index;
		order the rows by their row id which is internally generated
3745
		by InnoDB */
3746

3747
		error = create_clustered_index_when_no_primary(trx,
3748
							norm_name);
3749
  		if (error) {
unknown's avatar
unknown committed
3750 3751
			innobase_commit_low(trx);

unknown's avatar
unknown committed
3752
			row_mysql_unlock_data_dictionary(trx);
3753

3754 3755 3756 3757
			trx_free_for_mysql(trx);

			DBUG_RETURN(error);
      		}
3758 3759 3760
	}

	if (primary_key_no != -1) {
3761
		/* In InnoDB the clustered index must always be created
3762
		first */
unknown's avatar
unknown committed
3763 3764
	    	if ((error = create_index(trx, form, norm_name,
					  (uint) primary_key_no))) {
unknown's avatar
unknown committed
3765 3766
			innobase_commit_low(trx);

unknown's avatar
unknown committed
3767
			row_mysql_unlock_data_dictionary(trx);
3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778

  			trx_free_for_mysql(trx);

			DBUG_RETURN(error);
      		}
      	}

	for (i = 0; i < form->keys; i++) {

		if (i != (uint) primary_key_no) {

unknown's avatar
unknown committed
3779
    			if ((error = create_index(trx, form, norm_name, i))) {
3780

unknown's avatar
unknown committed
3781
			  	innobase_commit_low(trx);
3782

unknown's avatar
unknown committed
3783
				row_mysql_unlock_data_dictionary(trx);
3784 3785 3786 3787 3788

  				trx_free_for_mysql(trx);

				DBUG_RETURN(error);
      			}
3789
      		}
3790
  	}
3791

unknown's avatar
unknown committed
3792
	if (current_thd->query != NULL) {
unknown's avatar
unknown committed
3793
		LEX_STRING q;
unknown's avatar
unknown committed
3794

unknown's avatar
unknown committed
3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805
		if (thd->convert_string(&q, system_charset_info,
					current_thd->query,
					current_thd->query_length,
					current_thd->charset())) {
			error = HA_ERR_OUT_OF_MEM;
		} else {
			error = row_table_add_foreign_constraints(trx,
					q.str, norm_name);

			error = convert_error_code_to_mysql(error, NULL);
		}
3806

3807 3808
		if (error) {
			innobase_commit_low(trx);
unknown's avatar
unknown committed
3809

3810
			row_mysql_unlock_data_dictionary(trx);
3811

3812
  			trx_free_for_mysql(trx);
3813

3814 3815
			DBUG_RETURN(error);
		}
3816 3817
	}

unknown's avatar
unknown committed
3818 3819
  	innobase_commit_low(trx);

unknown's avatar
unknown committed
3820
	row_mysql_unlock_data_dictionary(trx);
3821

unknown's avatar
Merge  
unknown committed
3822 3823 3824
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
3825

unknown's avatar
unknown committed
3826
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
3827

3828
	innobase_table = dict_table_get(norm_name, NULL);
3829

unknown's avatar
unknown committed
3830
	DBUG_ASSERT(innobase_table != 0);
3831

3832
	/* Tell the InnoDB server that there might be work for
3833 3834 3835 3836 3837 3838 3839 3840 3841
	utility threads: */

	srv_active_wake_master_thread();

  	trx_free_for_mysql(trx);

	DBUG_RETURN(0);
}

unknown's avatar
unknown committed
3842 3843 3844 3845 3846 3847 3848 3849 3850 3851
/*********************************************************************
Discards or imports an InnoDB tablespace. */

int
ha_innobase::discard_or_import_tablespace(
/*======================================*/
				/* out: 0 == success, -1 == error */
	my_bool discard)	/* in: TRUE if discard, else import */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
3852
	dict_table_t*	dict_table;
unknown's avatar
unknown committed
3853 3854 3855 3856 3857 3858 3859 3860 3861
	trx_t*		trx;
	int		err;

 	DBUG_ENTER("ha_innobase::discard_or_import_tablespace");

	ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);

3862
	dict_table = prebuilt->table;
unknown's avatar
unknown committed
3863 3864 3865
	trx = prebuilt->trx;

	if (discard) {
3866
		err = row_discard_tablespace_for_mysql(dict_table->name, trx);
unknown's avatar
unknown committed
3867
	} else {
3868
		err = row_import_tablespace_for_mysql(dict_table->name, trx);
unknown's avatar
unknown committed
3869 3870 3871 3872 3873 3874 3875 3876 3877
	}

	if (err == DB_SUCCESS) {
		DBUG_RETURN(0);
	}

	DBUG_RETURN(-1);
}

3878
/*********************************************************************
3879
Drops a table from an InnoDB database. Before calling this function,
unknown's avatar
unknown committed
3880 3881
MySQL calls innobase_commit to commit the transaction of the current user.
Then the current user cannot have locks set on the table. Drop table
3882 3883
operation inside InnoDB will remove all locks any user has on the table
inside InnoDB. */
3884 3885 3886 3887

int
ha_innobase::delete_table(
/*======================*/
unknown's avatar
unknown committed
3888 3889
				/* out: error number */
	const char*	name)	/* in: table name */
3890 3891 3892
{
	ulint	name_len;
	int	error;
unknown's avatar
unknown committed
3893
	trx_t*	parent_trx;
3894
	trx_t*	trx;
unknown's avatar
unknown committed
3895
	THD     *thd= current_thd;
3896
	char	norm_name[1000];
unknown's avatar
unknown committed
3897

unknown's avatar
unknown committed
3898
 	DBUG_ENTER("ha_innobase::delete_table");
3899

unknown's avatar
unknown committed
3900 3901 3902
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
unknown's avatar
unknown committed
3903
	parent_trx = check_trx_exists(current_thd);
unknown's avatar
unknown committed
3904 3905 3906 3907 3908 3909

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

unknown's avatar
unknown committed
3910 3911 3912 3913 3914 3915
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

3916 3917
	trx = trx_allocate_for_mysql();

unknown's avatar
unknown committed
3918 3919
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
unknown's avatar
unknown committed
3920

unknown's avatar
unknown committed
3921 3922 3923 3924 3925 3926 3927 3928
	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
		trx->check_unique_secondary = FALSE;
	}

3929 3930 3931
	name_len = strlen(name);

	assert(name_len < 1000);
3932

3933 3934
	/* Strangely, MySQL passes the table name without the '.frm'
	extension, in contrast to ::create */
3935

3936 3937
	normalize_table_name(norm_name, name);

3938
  	/* Drop the table in InnoDB */
3939

3940
	error = row_drop_table_for_mysql(norm_name, trx,
unknown's avatar
unknown committed
3941
		thd->lex->sql_command == SQLCOM_DROP_DB);
3942

unknown's avatar
Merge  
unknown committed
3943 3944 3945
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
3946

unknown's avatar
unknown committed
3947
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
3948

3949
	/* Tell the InnoDB server that there might be work for
3950 3951 3952 3953
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
3954
  	innobase_commit_low(trx);
unknown's avatar
updated  
unknown committed
3955

3956 3957
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
3958
	error = convert_error_code_to_mysql(error, NULL);
3959 3960 3961 3962

	DBUG_RETURN(error);
}

3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975
/*********************************************************************
Removes all tables in the named database inside InnoDB. */

int
innobase_drop_database(
/*===================*/
			/* out: error number */
	char*	path)	/* in: database path; inside InnoDB the name
			of the last directory in the path is used as
			the database name: for example, in 'mysql/data/test'
			the database name is 'test' */
{
	ulint	len		= 0;
unknown's avatar
unknown committed
3976
	trx_t*	parent_trx;
3977 3978 3979
	trx_t*	trx;
	char*	ptr;
	int	error;
3980
	char*	namebuf;
unknown's avatar
unknown committed
3981

unknown's avatar
unknown committed
3982 3983 3984
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
unknown's avatar
unknown committed
3985
	parent_trx = check_trx_exists(current_thd);
unknown's avatar
unknown committed
3986 3987 3988 3989 3990 3991

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

3992
	ptr = strend(path) - 2;
unknown's avatar
unknown committed
3993

3994 3995 3996 3997 3998 3999
	while (ptr >= path && *ptr != '\\' && *ptr != '/') {
		ptr--;
		len++;
	}

	ptr++;
4000
	namebuf = my_malloc(len + 2, MYF(0));
4001 4002 4003 4004

	memcpy(namebuf, ptr, len);
	namebuf[len] = '/';
	namebuf[len + 1] = '\0';
unknown's avatar
unknown committed
4005
#ifdef  __WIN__
4006
	my_casedn_str(system_charset_info, namebuf);
unknown's avatar
unknown committed
4007
#endif
4008
	trx = trx_allocate_for_mysql();
unknown's avatar
unknown committed
4009 4010
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
4011

unknown's avatar
unknown committed
4012 4013 4014 4015
	if (current_thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

4016
  	error = row_drop_database_for_mysql(namebuf, trx);
4017
	my_free(namebuf, MYF(0));
4018

unknown's avatar
Merge  
unknown committed
4019 4020 4021
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
4022

unknown's avatar
unknown committed
4023
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
4024

4025 4026 4027 4028 4029
	/* Tell the InnoDB server that there might be work for
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
4030
  	innobase_commit_low(trx);
4031 4032
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
4033
	error = convert_error_code_to_mysql(error, NULL);
4034 4035 4036 4037

	return(error);
}

4038
/*************************************************************************
4039
Renames an InnoDB table. */
4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050

int
ha_innobase::rename_table(
/*======================*/
				/* out: 0 or error code */
	const char*	from,	/* in: old name of the table */
	const char*	to)	/* in: new name of the table */
{
	ulint	name_len1;
	ulint	name_len2;
	int	error;
unknown's avatar
unknown committed
4051
	trx_t*	parent_trx;
4052
	trx_t*	trx;
4053 4054
	char	norm_from[1000];
	char	norm_to[1000];
4055

4056 4057
  	DBUG_ENTER("ha_innobase::rename_table");

unknown's avatar
unknown committed
4058 4059 4060
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
unknown's avatar
unknown committed
4061
	parent_trx = check_trx_exists(current_thd);
unknown's avatar
unknown committed
4062 4063 4064 4065 4066 4067

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

unknown's avatar
unknown committed
4068 4069 4070 4071 4072 4073
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

4074
	trx = trx_allocate_for_mysql();
unknown's avatar
unknown committed
4075 4076
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
4077 4078 4079 4080 4081 4082

	name_len1 = strlen(from);
	name_len2 = strlen(to);

	assert(name_len1 < 1000);
	assert(name_len2 < 1000);
4083

4084 4085 4086
	normalize_table_name(norm_from, from);
	normalize_table_name(norm_to, to);

4087
  	/* Rename the table in InnoDB */
4088

4089
  	error = row_rename_table_for_mysql(norm_from, norm_to, trx);
4090

unknown's avatar
Merge  
unknown committed
4091 4092 4093
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
4094

unknown's avatar
unknown committed
4095
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
4096

4097
	/* Tell the InnoDB server that there might be work for
4098 4099 4100 4101
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
4102
  	innobase_commit_low(trx);
4103 4104
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
4105
	error = convert_error_code_to_mysql(error, NULL);
4106 4107 4108 4109 4110 4111 4112 4113 4114 4115

	DBUG_RETURN(error);
}

/*************************************************************************
Estimates the number of index records in a range. */

ha_rows
ha_innobase::records_in_range(
/*==========================*/
unknown's avatar
unknown committed
4116 4117
						/* out: estimated number of
						rows */
unknown's avatar
unknown committed
4118 4119 4120 4121 4122
	uint 			keynr,		/* in: index number */
        key_range		*min_key,	/* in: start key value of the
                                                   range, may also be 0 */
	key_range		*max_key)	/* in: range end key val, may
                                                   also be 0 */
4123 4124 4125 4126
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	KEY*		key;
	dict_index_t*	index;
4127
	mysql_byte*	key_val_buff2 	= (mysql_byte*) my_malloc(
unknown's avatar
unknown committed
4128 4129
						  table->reclength
      						+ table->max_key_length + 100,
4130
								MYF(MY_WME));
unknown's avatar
unknown committed
4131 4132
	ulint		buff2_len = table->reclength
      						+ table->max_key_length + 100;
4133
	dtuple_t*	range_start;
4134
	dtuple_t*	range_end;
unknown's avatar
unknown committed
4135
	ib_longlong	n_rows;
4136 4137
	ulint		mode1;
	ulint		mode2;
4138 4139
	void*           heap1;
	void*           heap2;
4140

4141
   	DBUG_ENTER("records_in_range");
4142

unknown's avatar
unknown committed
4143 4144
	prebuilt->trx->op_info = (char*)"estimating records in index range";

unknown's avatar
unknown committed
4145 4146 4147 4148
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
unknown's avatar
unknown committed
4149

4150 4151 4152
	active_index = keynr;

	key = table->key_info + active_index;
4153

4154
	index = dict_table_get_index_noninline(prebuilt->table, key->name);
4155

4156
	range_start = dtuple_create_for_mysql(&heap1, key->key_parts);
4157
 	dict_index_copy_types(range_start, index, key->key_parts);
4158

4159
	range_end = dtuple_create_for_mysql(&heap2, key->key_parts);
4160
 	dict_index_copy_types(range_end, index, key->key_parts);
4161

4162
	row_sel_convert_mysql_key_to_innobase(
unknown's avatar
unknown committed
4163 4164 4165
				range_start, (byte*) key_val_buff,
				(ulint)upd_and_key_val_buff_len,
				index,
unknown's avatar
unknown committed
4166 4167
				(byte*) (min_key ? min_key->key :
                                         (const mysql_byte*) 0),
4168 4169
				(ulint) (min_key ? min_key->length : 0),
				prebuilt->trx);
4170

4171
	row_sel_convert_mysql_key_to_innobase(
unknown's avatar
unknown committed
4172 4173
				range_end, (byte*) key_val_buff2,
				buff2_len, index,
unknown's avatar
unknown committed
4174 4175
				(byte*) (max_key ? max_key->key :
                                         (const mysql_byte*) 0),
4176 4177
				(ulint) (max_key ? max_key->length : 0),
				prebuilt->trx);
unknown's avatar
unknown committed
4178 4179 4180 4181 4182

	mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
                                                HA_READ_KEY_EXACT);
	mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
                                                HA_READ_KEY_EXACT);
4183

4184
	n_rows = btr_estimate_n_rows_in_range(index, range_start,
4185
						mode1, range_end, mode2);
4186 4187
	dtuple_free_for_mysql(heap1);
	dtuple_free_for_mysql(heap2);
4188

4189 4190
    	my_free((char*) key_val_buff2, MYF(0));

unknown's avatar
unknown committed
4191 4192
	prebuilt->trx->op_info = (char*)"";

unknown's avatar
unknown committed
4193 4194 4195 4196 4197 4198 4199 4200 4201 4202
	/* The MySQL optimizer seems to believe an estimate of 0 rows is
	always accurate and may return the result 'Empty set' based on that.
	The accuracy is not guaranteed, and even if it were, for a locking
	read we should anyway perform the search to set the next-key lock.
	Add 1 to the value to make sure MySQL does not make the assumption! */

	if (n_rows == 0) {
	        n_rows = 1;
	}

4203 4204 4205
	DBUG_RETURN((ha_rows) n_rows);
}

4206 4207
/*************************************************************************
Gives an UPPER BOUND to the number of rows in a table. This is used in
unknown's avatar
unknown committed
4208
filesort.cc. */
4209 4210

ha_rows
unknown's avatar
unknown committed
4211
ha_innobase::estimate_rows_upper_bound(void)
4212
/*======================================*/
4213
			/* out: upper bound of rows */
4214 4215
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
4216 4217
	dict_index_t*	index;
	ulonglong	estimate;
4218
	ulonglong	local_data_file_length;
unknown's avatar
unknown committed
4219

unknown's avatar
unknown committed
4220
 	DBUG_ENTER("estimate_rows_upper_bound");
4221

unknown's avatar
unknown committed
4222 4223 4224 4225 4226 4227
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

unknown's avatar
unknown committed
4228 4229 4230
	prebuilt->trx->op_info = (char*)
	                         "calculating upper bound for table rows";

unknown's avatar
unknown committed
4231 4232 4233 4234
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
4235

4236
	index = dict_table_get_first_index_noninline(prebuilt->table);
unknown's avatar
unknown committed
4237

4238
	local_data_file_length = ((ulonglong) index->stat_n_leaf_pages)
4239
    							* UNIV_PAGE_SIZE;
4240

unknown's avatar
unknown committed
4241 4242
	/* Calculate a minimum length for a clustered index record and from
	that an upper bound for the number of rows. Since we only calculate
unknown's avatar
unknown committed
4243 4244
	new statistics in row0mysql.c when a table has grown by a threshold
	factor, we must add a safety factor 2 in front of the formula below. */
unknown's avatar
unknown committed
4245

unknown's avatar
unknown committed
4246 4247
	estimate = 2 * local_data_file_length /
					 dict_index_calc_min_rec_len(index);
unknown's avatar
unknown committed
4248

unknown's avatar
unknown committed
4249 4250
	prebuilt->trx->op_info = (char*)"";

unknown's avatar
unknown committed
4251
	DBUG_RETURN((ha_rows) estimate);
4252 4253
}

4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265
/*************************************************************************
How many seeks it will take to read through the table. This is to be
comparable to the number returned by records_in_range so that we can
decide if we should scan the table or use keys. */

double
ha_innobase::scan_time()
/*====================*/
			/* out: estimated time measured in disk seeks */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;

unknown's avatar
unknown committed
4266 4267 4268 4269 4270 4271
	/* Since MySQL seems to favor table scans too much over index
	searches, we pretend that a sequential read takes the same time
	as a random disk read, that is, we do not divide the following
	by 10, which would be physically realistic. */
	
	return((double) (prebuilt->table->stat_clustered_index_size));
4272 4273
}

unknown's avatar
unknown committed
4274 4275 4276
/**********************************************************************
Calculate the time it takes to read a set of ranges through an index
This enables us to optimise reads for clustered indexes. */
unknown's avatar
unknown committed
4277

unknown's avatar
unknown committed
4278 4279 4280 4281 4282 4283 4284
double
ha_innobase::read_time(
/*===================*/
			/* out: estimated time measured in disk seeks */
	uint    index,	/* in: key number */
	uint	ranges,	/* in: how many ranges */
	ha_rows rows)	/* in: estimated number of rows in the ranges */
unknown's avatar
unknown committed
4285
{
unknown's avatar
unknown committed
4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297
	ha_rows total_rows;
	double  time_for_scan;
  
	if (index != table->primary_key)
	  return handler::read_time(index, ranges, rows); // Not clustered

	if (rows <= 2)
	  return (double) rows;

	/* Assume that the read time is proportional to the scan time for all
	rows + at most one seek per range. */

unknown's avatar
unknown committed
4298
	time_for_scan = scan_time();
unknown's avatar
unknown committed
4299

unknown's avatar
unknown committed
4300
	if ((total_rows = estimate_rows_upper_bound()) < rows)
unknown's avatar
unknown committed
4301 4302 4303
	  return time_for_scan;

	return (ranges + (double) rows / (double) total_rows * time_for_scan);
unknown's avatar
unknown committed
4304 4305
}

4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317
/*************************************************************************
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object. */

void
ha_innobase::info(
/*==============*/
	uint flag)	/* in: what information MySQL requests */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	dict_table_t*	ib_table;
	dict_index_t*	index;
4318
	ha_rows		rec_per_key;
4319 4320
	ulong		j;
	ulong		i;
4321

4322 4323
 	DBUG_ENTER("info");

unknown's avatar
unknown committed
4324 4325 4326 4327 4328 4329
        /* If we are forcing recovery at a high level, we will suppress
	statistics calculation on tables, because that may crash the
	server if an index is badly corrupted. */

        if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {

4330
                DBUG_VOID_RETURN;
unknown's avatar
unknown committed
4331 4332
        }

unknown's avatar
unknown committed
4333 4334 4335 4336 4337 4338 4339 4340 4341
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

unknown's avatar
unknown committed
4342 4343
	prebuilt->trx->op_info = (char*)"returning various info to MySQL";

unknown's avatar
unknown committed
4344
	trx_search_latch_release_if_reserved(prebuilt->trx);
unknown's avatar
unknown committed
4345

4346 4347 4348 4349 4350 4351
 	ib_table = prebuilt->table;

 	if (flag & HA_STATUS_TIME) {
 		/* In sql_show we call with this flag: update then statistics
 		so that they are up-to-date */

unknown's avatar
unknown committed
4352 4353
	        prebuilt->trx->op_info = (char*)"updating table statistics";

4354
 		dict_update_statistics(ib_table);
unknown's avatar
unknown committed
4355 4356 4357

		prebuilt->trx->op_info = (char*)
		                          "returning various info to MySQL";
4358 4359 4360
 	}

	if (flag & HA_STATUS_VARIABLE) {
4361
    		records = (ha_rows)ib_table->stat_n_rows;
4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374
    		deleted = 0;
    		data_file_length = ((ulonglong)
				ib_table->stat_clustered_index_size)
    					* UNIV_PAGE_SIZE;
    		index_file_length = ((ulonglong)
				ib_table->stat_sum_of_other_index_sizes)
    					* UNIV_PAGE_SIZE;
    		delete_length = 0;
    		check_time = 0;

    		if (records == 0) {
    			mean_rec_length = 0;
    		} else {
4375
    			mean_rec_length = (ulong) (data_file_length / records);
4376 4377 4378 4379 4380 4381 4382 4383 4384
    		}
    	}

	if (flag & HA_STATUS_CONST) {
		index = dict_table_get_first_index_noninline(ib_table);

		if (prebuilt->clust_index_was_generated) {
			index = dict_table_get_next_index_noninline(index);
		}
4385

4386
		for (i = 0; i < table->keys; i++) {
unknown's avatar
unknown committed
4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397
			if (index == NULL) {
				ut_print_timestamp(stderr);
			        fprintf(stderr,
"  InnoDB: Error: table %s contains less indexes inside InnoDB\n"
"InnoDB: than are defined in the MySQL .frm file. Have you mixed up\n"
"InnoDB: .frm files from different installations? See section\n"
"InnoDB: 15.1 at http://www.innodb.com/ibman.html\n",
				   ib_table->name);
				break;
			}

4398 4399
			for (j = 0; j < table->key_info[i].key_parts; j++) {

unknown's avatar
unknown committed
4400 4401 4402 4403 4404 4405 4406 4407
				if (j + 1 > index->n_uniq) {
				        ut_print_timestamp(stderr);
			                fprintf(stderr,
"  InnoDB: Error: index %s of %s has %lu columns unique inside InnoDB\n"
"InnoDB: but MySQL is asking statistics for %lu columns. Have you mixed up\n"
"InnoDB: .frm files from different installations? See section\n"
"InnoDB: 15.1 at http://www.innodb.com/ibman.html\n",
						index->name,
4408 4409
						ib_table->name,
						(unsigned long) index->n_uniq,
unknown's avatar
unknown committed
4410 4411 4412 4413
						j + 1);
				        break;
				}

4414 4415 4416 4417
				if (index->stat_n_diff_key_vals[j + 1] == 0) {

					rec_per_key = records;
				} else {
4418
					rec_per_key = (ha_rows)(records /
4419 4420 4421
   				         index->stat_n_diff_key_vals[j + 1]);
				}

unknown's avatar
unknown committed
4422 4423 4424 4425 4426 4427 4428
				/* Since MySQL seems to favor table scans
				too much over index searches, we pretend
				index selectivity is 2 times better than
				our estimate: */

				rec_per_key = rec_per_key / 2;

4429 4430 4431
				if (rec_per_key == 0) {
					rec_per_key = 1;
				}
unknown's avatar
unknown committed
4432

4433 4434 4435
 				table->key_info[i].rec_per_key[j]=
				  rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
				  rec_per_key;
4436
			}
unknown's avatar
unknown committed
4437

4438
			index = dict_table_get_next_index_noninline(index);
4439 4440
		}
	}
4441 4442

  	if (flag & HA_STATUS_ERRKEY) {
unknown's avatar
unknown committed
4443 4444
		ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);

4445
		errkey = (unsigned int) row_get_mysql_key_number_for_index(
4446 4447
				       (dict_index_t*)
				       trx_get_error_info(prebuilt->trx));
4448 4449
  	}

unknown's avatar
unknown committed
4450 4451
	prebuilt->trx->op_info = (char*)"";

4452 4453 4454
  	DBUG_VOID_RETURN;
}

unknown's avatar
unknown committed
4455
/**************************************************************************
unknown's avatar
unknown committed
4456 4457
Updates index cardinalities of the table, based on 8 random dives into
each index tree. This does NOT calculate exact statistics on the table. */
unknown's avatar
unknown committed
4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471

int
ha_innobase::analyze(
/*=================*/			 
					/* out: returns always 0 (success) */
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
{
	/* Simply call ::info() with all the flags */
	info(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE);

	return(0);
}

unknown's avatar
unknown committed
4472
/**************************************************************************
4473 4474
This is mapped to "ALTER TABLE tablename TYPE=InnoDB", which rebuilds
the table in MySQL. */
unknown's avatar
unknown committed
4475

unknown's avatar
unknown committed
4476 4477 4478 4479 4480
int
ha_innobase::optimize(
/*==================*/
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
unknown's avatar
unknown committed
4481
{
4482
        return(HA_ADMIN_TRY_ALTER);
unknown's avatar
unknown committed
4483 4484
}

unknown's avatar
unknown committed
4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500
/***********************************************************************
Tries to check that an InnoDB table is not corrupted. If corruption is
noticed, prints to stderr information about it. In case of corruption
may also assert a failure and crash the server. */

int
ha_innobase::check(
/*===============*/
					/* out: HA_ADMIN_CORRUPT or
					HA_ADMIN_OK */
	THD* 		thd,		/* in: user thread handle */
	HA_CHECK_OPT* 	check_opt)	/* in: check options, currently
					ignored */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	ulint		ret;
unknown's avatar
unknown committed
4501

unknown's avatar
unknown committed
4502
	ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
unknown's avatar
unknown committed
4503 4504
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
4505

unknown's avatar
unknown committed
4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517
	if (prebuilt->mysql_template == NULL) {
		/* Build the template; we will use a dummy template
		in index scans done in checking */

		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}

	ret = row_check_table_for_mysql(prebuilt);

	if (ret == DB_SUCCESS) {
		return(HA_ADMIN_OK);
	}
unknown's avatar
unknown committed
4518

unknown's avatar
unknown committed
4519 4520 4521
  	return(HA_ADMIN_CORRUPT); 
}

4522
/*****************************************************************
unknown's avatar
Merge  
unknown committed
4523 4524 4525
Adds information about free space in the InnoDB tablespace to a table comment
which is printed out when a user calls SHOW TABLE STATUS. Adds also info on
foreign keys. */
4526 4527 4528 4529

char*
ha_innobase::update_table_comment(
/*==============================*/
unknown's avatar
Merge  
unknown committed
4530 4531 4532
				/* out: table comment + InnoDB free space +
				info on foreign keys */
        const char*	comment)/* in: table comment defined by user */
4533
{
4534 4535 4536
	uint	length			= strlen(comment);
	char*				str;
	row_prebuilt_t*	prebuilt	= (row_prebuilt_t*)innobase_prebuilt;
4537

unknown's avatar
unknown committed
4538 4539 4540 4541
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

4542 4543 4544 4545
	if(length > 64000 - 3) {
		return((char*)comment); /* string too long */
	}

unknown's avatar
unknown committed
4546 4547
	update_thd(current_thd);

unknown's avatar
unknown committed
4548 4549
	prebuilt->trx->op_info = (char*)"returning table comment";

unknown's avatar
unknown committed
4550 4551 4552 4553
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
4554
	str = NULL;
unknown's avatar
unknown committed
4555

4556
	if (FILE* file = os_file_create_tmpfile()) {
4557
		long	flen;
unknown's avatar
Merge  
unknown committed
4558

4559 4560
		/* output the data to a temporary file */
		fprintf(file, "InnoDB free: %lu kB",
unknown's avatar
unknown committed
4561 4562 4563
      		   (ulong) fsp_get_available_space_in_free_extents(
      					prebuilt->table->space));

4564 4565
		dict_print_info_on_foreign_keys(FALSE, file,
				prebuilt->trx, prebuilt->table);
4566 4567 4568 4569
		flen = ftell(file);
		if(length + flen + 3 > 64000) {
			flen = 64000 - 3 - length;
		}
4570

4571
		ut_ad(flen > 0);
4572

4573 4574
		/* allocate buffer for the full string, and
		read the contents of the temporary file */
4575

4576
		str = my_malloc(length + flen + 3, MYF(0));
4577

4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590
		if (str) {
			char* pos	= str + length;
			if(length) {
				memcpy(str, comment, length);
				*pos++ = ';';
				*pos++ = ' ';
			}
			rewind(file);
			flen = fread(pos, 1, flen, file);
			pos[flen] = 0;
		}

		fclose(file);
unknown's avatar
unknown committed
4591
	}
unknown's avatar
unknown committed
4592

unknown's avatar
unknown committed
4593 4594
        prebuilt->trx->op_info = (char*)"";

4595
  	return(str ? str : (char*) comment);
4596 4597
}

unknown's avatar
unknown committed
4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608
/***********************************************************************
Gets the foreign key create info for a table stored in InnoDB. */

char*
ha_innobase::get_foreign_key_create_info(void)
/*==========================================*/
			/* out, own: character string in the form which
			can be inserted to the CREATE TABLE statement,
			MUST be freed with ::free_foreign_key_create_info */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
4609
	char*	str	= 0;
unknown's avatar
unknown committed
4610

unknown's avatar
unknown committed
4611
	ut_a(prebuilt != NULL);
4612

unknown's avatar
unknown committed
4613 4614 4615 4616 4617 4618
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

4619
	if (FILE* file = os_file_create_tmpfile()) {
4620
		long	flen;
unknown's avatar
unknown committed
4621

4622
		prebuilt->trx->op_info = (char*)"getting info on foreign keys";
unknown's avatar
unknown committed
4623

4624 4625 4626
		/* In case MySQL calls this in the middle of a SELECT query,
		release possible adaptive hash latch to avoid
		deadlocks of threads */
4627

4628
		trx_search_latch_release_if_reserved(prebuilt->trx);
unknown's avatar
unknown committed
4629

4630
		/* output the data to a temporary file */
4631 4632
		dict_print_info_on_foreign_keys(TRUE, file,
				prebuilt->trx, prebuilt->table);
4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655
		prebuilt->trx->op_info = (char*)"";

		flen = ftell(file);
		if(flen > 64000 - 1) {
			flen = 64000 - 1;
		}

		ut_ad(flen >= 0);

		/* allocate buffer for the string, and
		read the contents of the temporary file */

		str = my_malloc(flen + 1, MYF(0));

		if (str) {
			rewind(file);
			flen = fread(str, 1, flen, file);
			str[flen] = 0;
		}

		fclose(file);
	} else {
		/* unable to create temporary file */
unknown's avatar
unknown committed
4656
          	str = my_malloc(1, MYF(MY_ZEROFILL));
4657
	}
unknown's avatar
unknown committed
4658

unknown's avatar
Merge  
unknown committed
4659
  	return(str);
unknown's avatar
unknown committed
4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681
}

/***********************************************************************
Checks if a table is referenced by a foreign key. The MySQL manual states that
a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
delete is then allowed internally to resolve a duplicate key conflict in
REPLACE, not an update. */

uint
ha_innobase::referenced_by_foreign_key(void)
/*========================================*/
			/* out: > 0 if referenced by a FOREIGN KEY */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;

	if (dict_table_referenced_by_foreign_key(prebuilt->table)) {

		return(1);
	}

	return(0);
}
unknown's avatar
unknown committed
4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692

/***********************************************************************
Frees the foreign key create info for a table stored in InnoDB, if it is
non-NULL. */

void
ha_innobase::free_foreign_key_create_info(
/*======================================*/
	char*	str)	/* in, own: create info string to free  */
{
	if (str) {
4693
		my_free(str, MYF(0));
unknown's avatar
unknown committed
4694
	}
4695 4696
}

unknown's avatar
unknown committed
4697 4698 4699 4700 4701 4702 4703 4704
/***********************************************************************
Tells something additional to the handler about how to do things. */

int
ha_innobase::extra(
/*===============*/
			   /* out: 0 or error number */
	enum ha_extra_function operation)
unknown's avatar
unknown committed
4705 4706
                           /* in: HA_EXTRA_RETRIEVE_ALL_COLS or some
			   other flag */
unknown's avatar
unknown committed
4707 4708 4709 4710 4711 4712 4713 4714
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;

	/* Warning: since it is not sure that MySQL calls external_lock
	before calling this function, the trx field in prebuilt can be
	obsolete! */

	switch (operation) {
unknown's avatar
unknown committed
4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726
                case HA_EXTRA_FLUSH:
                        if (prebuilt->blob_heap) {
                                row_mysql_prebuilt_free_blob_heap(prebuilt);
                        }
                        break;
                case HA_EXTRA_RESET:
                        if (prebuilt->blob_heap) {
                                row_mysql_prebuilt_free_blob_heap(prebuilt);
                        }
                        prebuilt->read_just_key = 0;
                        break;
  		case HA_EXTRA_RESET_STATE:
unknown's avatar
unknown committed
4727
	        	prebuilt->read_just_key = 0;
unknown's avatar
unknown committed
4728
    	        	break;
unknown's avatar
unknown committed
4729 4730 4731
		case HA_EXTRA_NO_KEYREAD:
    			prebuilt->read_just_key = 0;
    			break;
unknown's avatar
unknown committed
4732
	        case HA_EXTRA_RETRIEVE_ALL_COLS:
unknown's avatar
unknown committed
4733 4734 4735 4736 4737 4738 4739 4740
			prebuilt->hint_need_to_fetch_extra_cols
					= ROW_RETRIEVE_ALL_COLS;
			break;
	        case HA_EXTRA_RETRIEVE_PRIMARY_KEY:
			if (prebuilt->hint_need_to_fetch_extra_cols == 0) {
				prebuilt->hint_need_to_fetch_extra_cols
					= ROW_RETRIEVE_PRIMARY_KEY;
			}
unknown's avatar
unknown committed
4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751
			break;
	        case HA_EXTRA_KEYREAD:
	        	prebuilt->read_just_key = 1;
	        	break;
		default:/* Do nothing */
			;
	}

	return(0);
}

unknown's avatar
unknown committed
4752
/**********************************************************************
unknown's avatar
unknown committed
4753 4754 4755 4756 4757
MySQL calls this function at the start of each SQL statement inside LOCK
TABLES. Inside LOCK TABLES the ::external_lock method does not work to
mark SQL statement borders. Note also a special case: if a temporary table
is created inside LOCK TABLES, MySQL has not called external_lock() at all
on that table. */
unknown's avatar
unknown committed
4758 4759

int
unknown's avatar
unknown committed
4760 4761
ha_innobase::start_stmt(
/*====================*/
unknown's avatar
unknown committed
4762 4763 4764 4765 4766 4767 4768 4769 4770 4771
	              /* out: 0 or error code */
	THD*    thd)  /* in: handle to the user thread */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

	update_thd(thd);

	trx = prebuilt->trx;

unknown's avatar
unknown committed
4772 4773 4774 4775 4776 4777 4778
	/* Here we release the search latch and the InnoDB thread FIFO ticket
	if they were reserved. They should have been released already at the
	end of the previous statement, but because inside LOCK TABLES the
	lock count method does not work to mark the end of a SELECT statement,
	that may not be the case. We MUST release the search latch before an
	INSERT, for example. */

unknown's avatar
unknown committed
4779 4780
	innobase_release_stat_resources(trx);

unknown's avatar
unknown committed
4781 4782 4783 4784 4785 4786 4787 4788
	if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
	    						&& trx->read_view) {
	    	/* At low transaction isolation levels we let
		each consistent read set its own snapshot */

	    	read_view_close_for_mysql(trx);
	}

unknown's avatar
unknown committed
4789 4790
	auto_inc_counter_for_this_stat = 0;
	prebuilt->sql_stat_start = TRUE;
unknown's avatar
unknown committed
4791
	prebuilt->hint_need_to_fetch_extra_cols = 0;
unknown's avatar
unknown committed
4792
	prebuilt->read_just_key = 0;
unknown's avatar
unknown committed
4793

4794
	if (!prebuilt->mysql_has_locked) {
unknown's avatar
unknown committed
4795 4796 4797 4798 4799 4800
	        /* This handle is for a temporary table created inside
	        this same LOCK TABLES; since MySQL does NOT call external_lock
	        in this case, we must use x-row locks inside InnoDB to be
	        prepared for an update of a row */
	  
	        prebuilt->select_lock_type = LOCK_X;
unknown's avatar
unknown committed
4801 4802 4803 4804
	} else {
		if (trx->isolation_level != TRX_ISO_SERIALIZABLE
		    && thd->lex->sql_command == SQLCOM_SELECT
		    && thd->lex->lock_option == TL_READ) {
unknown's avatar
unknown committed
4805
	
unknown's avatar
unknown committed
4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835
			/* For other than temporary tables, we obtain
			no lock for consistent read (plain SELECT). */

			prebuilt->select_lock_type = LOCK_NONE;
		} else {
			/* Not a consistent read: restore the
			select_lock_type value. The value of
			stored_select_lock_type was decided in:
			1) ::store_lock(),
			2) ::external_lock(), and
			3) ::init_table_handle_for_HANDLER(). */

			prebuilt->select_lock_type =
				prebuilt->stored_select_lock_type;
		}

		if (prebuilt->stored_select_lock_type != LOCK_S
		    && prebuilt->stored_select_lock_type != LOCK_X) {
			fprintf(stderr,
"InnoDB: Error: stored_select_lock_type is %lu inside ::start_stmt()!\n",
			prebuilt->stored_select_lock_type);

			/* Set the value to LOCK_X: this is just fault
			tolerance, we do not know what the correct value
			should be! */

			prebuilt->select_lock_type = LOCK_X;
		}
	}

unknown's avatar
unknown committed
4836
	/* Set the MySQL flag to mark that there is an active transaction */
unknown's avatar
unknown committed
4837
	thd->transaction.all.innodb_active_trans = 1;
unknown's avatar
unknown committed
4838 4839

	return(0);
unknown's avatar
unknown committed
4840 4841
}

unknown's avatar
unknown committed
4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852
/**********************************************************************
Maps a MySQL trx isolation level code to the InnoDB isolation level code */
inline
ulint
innobase_map_isolation_level(
/*=========================*/
					/* out: InnoDB isolation level */
	enum_tx_isolation	iso)	/* in: MySQL isolation level code */
{
	switch(iso) {
		case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
unknown's avatar
unknown committed
4853
		case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
unknown's avatar
unknown committed
4854 4855 4856 4857 4858 4859
		case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
		case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
		default: ut_a(0); return(0);
	}	
}
	
unknown's avatar
unknown committed
4860 4861
/**********************************************************************
As MySQL will execute an external lock for every new table it uses when it
unknown's avatar
unknown committed
4862 4863 4864
starts to process an SQL statement (an exception is when MySQL calls
start_stmt for the handle) we can use this function to store the pointer to
the THD in the handle. We will also use this function to communicate
unknown's avatar
unknown committed
4865 4866 4867 4868 4869 4870 4871
to InnoDB that a new SQL statement has started and that we must store a
savepoint to our transaction handle, so that we are able to roll back
the SQL statement in case of an error. */

int
ha_innobase::external_lock(
/*=======================*/
4872
			        /* out: 0 */
unknown's avatar
unknown committed
4873 4874 4875 4876 4877 4878 4879
	THD*	thd,		/* in: handle to the user thread */
	int 	lock_type)	/* in: lock type */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

  	DBUG_ENTER("ha_innobase::external_lock");
unknown's avatar
unknown committed
4880
	DBUG_PRINT("enter",("lock_type: %d", lock_type));
unknown's avatar
unknown committed
4881 4882 4883

	update_thd(thd);

4884
 	if (prebuilt->table->ibd_file_missing && !current_thd->tablespace_op) {
unknown's avatar
unknown committed
4885 4886 4887 4888 4889 4890 4891 4892 4893
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"MySQL is trying to use a table handle but the .ibd file for\n"
"table %s does not exist.\n"
"Have you deleted the .ibd file from the database directory under\n"
"the MySQL datadir, or have you used DISCARD TABLESPACE?\n"
"Look from section 15.1 of http://www.innodb.com/ibman.html\n"
"how you can resolve the problem.\n",
				prebuilt->table->name);
unknown's avatar
unknown committed
4894
		DBUG_RETURN(HA_ERR_CRASHED);
unknown's avatar
unknown committed
4895 4896
	}

unknown's avatar
unknown committed
4897 4898 4899
	trx = prebuilt->trx;

	prebuilt->sql_stat_start = TRUE;
unknown's avatar
unknown committed
4900
	prebuilt->hint_need_to_fetch_extra_cols = 0;
unknown's avatar
unknown committed
4901 4902 4903 4904 4905 4906 4907 4908

	prebuilt->read_just_key = 0;

	if (lock_type == F_WRLCK) {

		/* If this is a SELECT, then it is in UPDATE TABLE ...
		or SELECT ... FOR UPDATE */
		prebuilt->select_lock_type = LOCK_X;
unknown's avatar
unknown committed
4909
		prebuilt->stored_select_lock_type = LOCK_X;
unknown's avatar
unknown committed
4910 4911 4912
	}

	if (lock_type != F_UNLCK) {
unknown's avatar
unknown committed
4913
		/* MySQL is setting a new table lock */
unknown's avatar
unknown committed
4914

unknown's avatar
unknown committed
4915 4916
		/* Set the MySQL flag to mark that there is an active
		transaction */
unknown's avatar
unknown committed
4917
		thd->transaction.all.innodb_active_trans = 1;
unknown's avatar
unknown committed
4918

unknown's avatar
unknown committed
4919
		trx->n_mysql_tables_in_use++;
4920
		prebuilt->mysql_has_locked = TRUE;
unknown's avatar
unknown committed
4921

unknown's avatar
unknown committed
4922 4923
		if (trx->n_mysql_tables_in_use == 1) {
		        trx->isolation_level = innobase_map_isolation_level(
unknown's avatar
unknown committed
4924 4925
						(enum_tx_isolation)
						thd->variables.tx_isolation);
unknown's avatar
unknown committed
4926
		}
unknown's avatar
unknown committed
4927 4928

		if (trx->isolation_level == TRX_ISO_SERIALIZABLE
unknown's avatar
unknown committed
4929 4930
		    && prebuilt->select_lock_type == LOCK_NONE
		    && (thd->options
unknown's avatar
unknown committed
4931
				& (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
unknown's avatar
unknown committed
4932

unknown's avatar
unknown committed
4933 4934
			/* To get serializable execution, we let InnoDB
			conceptually add 'LOCK IN SHARE MODE' to all SELECTs
unknown's avatar
unknown committed
4935 4936 4937 4938 4939
			which otherwise would have been consistent reads. An
			exception is consistent reads in the AUTOCOMMIT=1 mode:
			we know that they are read-only transactions, and they
			can be serialized also if performed as consistent
			reads. */
unknown's avatar
unknown committed
4940 4941 4942 4943

			prebuilt->select_lock_type = LOCK_S;
		}

unknown's avatar
unknown committed
4944
		if (prebuilt->select_lock_type != LOCK_NONE) {
4945 4946 4947 4948 4949 4950 4951 4952 4953 4954
			if (thd->in_lock_tables) {
				ulint	error;
				error = row_lock_table_for_mysql(prebuilt);

				if (error != DB_SUCCESS) {
					error = convert_error_code_to_mysql(
						error, user_thd);
					DBUG_RETURN(error);
				}
			}
unknown's avatar
unknown committed
4955 4956 4957 4958

		  	trx->mysql_n_tables_locked++;
		}

4959
		DBUG_RETURN(0);
unknown's avatar
unknown committed
4960
	}
unknown's avatar
unknown committed
4961

unknown's avatar
unknown committed
4962
	/* MySQL is releasing a table lock */
unknown's avatar
unknown committed
4963

unknown's avatar
unknown committed
4964 4965 4966
	trx->n_mysql_tables_in_use--;
	prebuilt->mysql_has_locked = FALSE;
	auto_inc_counter_for_this_stat = 0;
unknown's avatar
unknown committed
4967 4968
	if (trx->n_lock_table_exp) {
		row_unlock_tables_for_mysql(trx);
4969
	}
4970

unknown's avatar
unknown committed
4971 4972
	/* If the MySQL lock count drops to zero we know that the current SQL
	statement has ended */
unknown's avatar
unknown committed
4973

unknown's avatar
unknown committed
4974
	if (trx->n_mysql_tables_in_use == 0) {
unknown's avatar
unknown committed
4975

unknown's avatar
unknown committed
4976 4977 4978
	        trx->mysql_n_tables_locked = 0;
		prebuilt->used_in_HANDLER = FALSE;
			
unknown's avatar
unknown committed
4979 4980 4981 4982
		/* Release a possible FIFO ticket and search latch. Since we
		may reserve the kernel mutex, we have to release the search
		system latch first to obey the latching order. */

unknown's avatar
unknown committed
4983
		innobase_release_stat_resources(trx);
unknown's avatar
unknown committed
4984

unknown's avatar
unknown committed
4985
		if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
unknown's avatar
unknown committed
4986 4987 4988 4989
			if (thd->transaction.all.innodb_active_trans != 0) {
		    	        innobase_commit(thd, trx);
			}
		} else {
unknown's avatar
unknown committed
4990 4991 4992
			if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
	    						&& trx->read_view) {

unknown's avatar
unknown committed
4993
				/* At low transaction isolation levels we let
unknown's avatar
unknown committed
4994 4995
				each consistent read set its own snapshot */

unknown's avatar
unknown committed
4996
				read_view_close_for_mysql(trx);
unknown's avatar
unknown committed
4997
			}
unknown's avatar
unknown committed
4998 4999 5000
		}
	}

5001
	DBUG_RETURN(0);
unknown's avatar
unknown committed
5002 5003
}

unknown's avatar
unknown committed
5004
/****************************************************************************
unknown's avatar
unknown committed
5005
Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
unknown's avatar
unknown committed
5006 5007 5008 5009 5010 5011 5012
Monitor to the client. */

int
innodb_show_status(
/*===============*/
	THD*	thd)	/* in: the MySQL query thread of the caller */
{
unknown's avatar
unknown committed
5013
        Protocol        *protocol= thd->protocol;
unknown's avatar
unknown committed
5014
	trx_t*		trx;
unknown's avatar
unknown committed
5015

unknown's avatar
unknown committed
5016 5017
        DBUG_ENTER("innodb_show_status");

5018
        if (have_innodb != SHOW_OPTION_YES) {
unknown's avatar
unknown committed
5019 5020 5021
                my_message(ER_NOT_SUPPORTED_YET,
          "Cannot call SHOW INNODB STATUS because skip-innodb is defined",
                           MYF(0));
unknown's avatar
unknown committed
5022 5023
                DBUG_RETURN(-1);
        }
unknown's avatar
unknown committed
5024

unknown's avatar
unknown committed
5025 5026 5027 5028
	trx = check_trx_exists(thd);

	innobase_release_stat_resources(trx);

5029
	/* We let the InnoDB Monitor to output at most 64000 bytes of text. */
5030

5031 5032
	long	flen;
	char*	str;
unknown's avatar
unknown committed
5033

5034
	mutex_enter_noninline(&srv_monitor_file_mutex);
5035 5036 5037
	rewind(srv_monitor_file);
	srv_printf_innodb_monitor(srv_monitor_file);
	flen = ftell(srv_monitor_file);
5038
	os_file_set_eof(srv_monitor_file);
5039 5040 5041
	if(flen > 64000 - 1) {
		flen = 64000 - 1;
	}
unknown's avatar
unknown committed
5042

5043
	ut_ad(flen > 0);
unknown's avatar
unknown committed
5044

5045 5046
	/* allocate buffer for the string, and
	read the contents of the temporary file */
unknown's avatar
unknown committed
5047

unknown's avatar
unknown committed
5048
	if (!(str = my_malloc(flen + 1, MYF(0))))
unknown's avatar
unknown committed
5049
        {
unknown's avatar
unknown committed
5050 5051
          mutex_exit_noninline(&srv_monitor_file_mutex);
          DBUG_RETURN(-1);
unknown's avatar
unknown committed
5052
        }
unknown's avatar
unknown committed
5053

unknown's avatar
unknown committed
5054 5055
	rewind(srv_monitor_file);
	flen = fread(str, 1, flen, srv_monitor_file);
5056

5057
	mutex_exit_noninline(&srv_monitor_file_mutex);
5058

unknown's avatar
unknown committed
5059 5060
	List<Item> field_list;

5061
	field_list.push_back(new Item_empty_string("Status", flen));
unknown's avatar
unknown committed
5062

unknown's avatar
unknown committed
5063
	if (protocol->send_fields(&field_list, 1)) {
unknown's avatar
unknown committed
5064

5065
		my_free(str, MYF(0));
unknown's avatar
unknown committed
5066

5067
		DBUG_RETURN(-1);
unknown's avatar
unknown committed
5068 5069
	}

unknown's avatar
unknown committed
5070 5071 5072
        protocol->prepare_for_resend();
        protocol->store(str, flen, system_charset_info);
        my_free(str, MYF(0));
unknown's avatar
unknown committed
5073 5074 5075 5076

        if (protocol->write())
          DBUG_RETURN(-1);

unknown's avatar
unknown committed
5077
	send_eof(thd);
unknown's avatar
unknown committed
5078 5079 5080
  	DBUG_RETURN(0);
}

5081 5082 5083 5084 5085
/****************************************************************************
 Handling the shared INNOBASE_SHARE structure that is needed to provide table
 locking.
****************************************************************************/

5086
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
5087 5088 5089
			      my_bool not_used __attribute__((unused)))
{
  *length=share->table_name_length;
5090
  return (mysql_byte*) share->table_name;
5091 5092 5093 5094 5095 5096 5097
}

static INNOBASE_SHARE *get_share(const char *table_name)
{
  INNOBASE_SHARE *share;
  pthread_mutex_lock(&innobase_mutex);
  uint length=(uint) strlen(table_name);
unknown's avatar
unknown committed
5098
  if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables,
5099
					(mysql_byte*) table_name,
5100 5101 5102 5103 5104 5105 5106 5107
					    length)))
  {
    if ((share=(INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1,
				       MYF(MY_WME | MY_ZEROFILL))))
    {
      share->table_name_length=length;
      share->table_name=(char*) (share+1);
      strmov(share->table_name,table_name);
unknown's avatar
SCRUM  
unknown committed
5108
      if (my_hash_insert(&innobase_open_tables, (mysql_byte*) share))
5109 5110 5111 5112 5113 5114
      {
	pthread_mutex_unlock(&innobase_mutex);
	my_free((gptr) share,0);
	return 0;
      }
      thr_lock_init(&share->lock);
5115
      pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST);
5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127
    }
  }
  share->use_count++;
  pthread_mutex_unlock(&innobase_mutex);
  return share;
}

static void free_share(INNOBASE_SHARE *share)
{
  pthread_mutex_lock(&innobase_mutex);
  if (!--share->use_count)
  {
5128
    hash_delete(&innobase_open_tables, (mysql_byte*) share);
5129 5130 5131 5132 5133 5134
    thr_lock_delete(&share->lock);
    pthread_mutex_destroy(&share->mutex);
    my_free((gptr) share, MYF(0));
  }
  pthread_mutex_unlock(&innobase_mutex);
}
5135 5136

/*********************************************************************
unknown's avatar
unknown committed
5137
Converts a MySQL table lock stored in the 'lock' field of the handle to
unknown's avatar
unknown committed
5138 5139 5140 5141 5142 5143
a proper type before storing pointer to the lock into an array of pointers.
MySQL also calls this if it wants to reset some table locks to a not-locked
state during the processing of an SQL query. An example is that during a
SELECT the read lock is released early on the 'const' tables where we only
fetch one row. MySQL does not call this when it releases all locks at the
end of an SQL statement. */
5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160

THR_LOCK_DATA**
ha_innobase::store_lock(
/*====================*/
						/* out: pointer to the next
						element in the 'to' array */
	THD*			thd,		/* in: user thread handle */
	THR_LOCK_DATA**		to,		/* in: pointer to an array
						of pointers to lock structs;
						pointer to the 'lock' field
						of current handle is stored
						next to this array */
	enum thr_lock_type 	lock_type)	/* in: lock type to store in
						'lock' */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;

unknown's avatar
unknown committed
5161 5162 5163
	if ((lock_type == TL_READ && thd->in_lock_tables) ||
	    (lock_type == TL_READ_HIGH_PRIORITY && thd->in_lock_tables) ||
	    lock_type == TL_READ_WITH_SHARED_LOCKS ||
unknown's avatar
unknown committed
5164
	    lock_type == TL_READ_NO_INSERT) {
unknown's avatar
unknown committed
5165 5166 5167 5168 5169
		/* The OR cases above are in this order:
		1) MySQL is doing LOCK TABLES ... READ LOCAL, or
		2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
		3) this is a SELECT ... IN SHARE MODE, or
		4) we are doing a complex SQL statement like
unknown's avatar
unknown committed
5170
		INSERT INTO ... SELECT ... and the logical logging (MySQL
unknown's avatar
unknown committed
5171 5172
		binlog) requires the use of a locking read, or
		MySQL is doing LOCK TABLES ... READ. */
unknown's avatar
unknown committed
5173

5174
		prebuilt->select_lock_type = LOCK_S;
unknown's avatar
unknown committed
5175 5176
		prebuilt->stored_select_lock_type = LOCK_S;

unknown's avatar
unknown committed
5177 5178 5179 5180 5181 5182 5183
	} else if (lock_type != TL_IGNORE) {

	        /* In ha_berkeley.cc there is a comment that MySQL
	        may in exceptional cases call this with TL_IGNORE also
	        when it is NOT going to release the lock. */

	        /* We set possible LOCK_X value in external_lock, not yet
5184
		here even if this would be SELECT ... FOR UPDATE */
unknown's avatar
unknown committed
5185

5186
		prebuilt->select_lock_type = LOCK_NONE;
unknown's avatar
unknown committed
5187
		prebuilt->stored_select_lock_type = LOCK_NONE;
5188 5189 5190 5191
	}

	if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {

unknown's avatar
unknown committed
5192 5193
    		/* If we are not doing a LOCK TABLE or DISCARD/IMPORT
		TABLESPACE, then allow multiple writers */
5194 5195

    		if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
unknown's avatar
unknown committed
5196 5197
	 	    lock_type <= TL_WRITE) && !thd->in_lock_tables
		    && !thd->tablespace_op) {
5198 5199 5200 5201

      			lock_type = TL_WRITE_ALLOW_WRITE;
      		}

unknown's avatar
unknown committed
5202 5203 5204 5205 5206 5207 5208 5209 5210 5211
		/* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
		MySQL would use the lock TL_READ_NO_INSERT on t2, and that
		would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
		to t2. Convert the lock to a normal read lock to allow
		concurrent inserts to t2. */
      		
		if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables) {
			lock_type = TL_READ;
		}
		
5212 5213 5214 5215
 		lock.type=lock_type;
  	}

  	*to++= &lock;
5216

5217 5218 5219
	return(to);
}

5220
/***********************************************************************
unknown's avatar
unknown committed
5221 5222
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
unknown's avatar
 
unknown committed
5223
counter if it already has been initialized. In paramete ret returns
unknown's avatar
unknown committed
5224
the value of the auto-inc counter. */
5225

unknown's avatar
unknown committed
5226 5227 5228 5229 5230 5231
int
ha_innobase::innobase_read_and_init_auto_inc(
/*=========================================*/
				/* out: 0 or error code: deadlock or
				lock wait timeout */
	longlong*	ret)	/* out: auto-inc value */
5232
{
unknown's avatar
unknown committed
5233
  	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
unknown's avatar
unknown committed
5234
    	longlong        auto_inc;
unknown's avatar
unknown committed
5235
  	int     	error;
5236

unknown's avatar
unknown committed
5237
  	ut_a(prebuilt);
unknown's avatar
unknown committed
5238 5239
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
5240 5241
	ut_a(prebuilt->table);
	
unknown's avatar
unknown committed
5242 5243 5244 5245 5246
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);

unknown's avatar
unknown committed
5247
	auto_inc = dict_table_autoinc_read(prebuilt->table);
unknown's avatar
unknown committed
5248

unknown's avatar
unknown committed
5249 5250 5251 5252 5253 5254
	if (auto_inc != 0) {
		/* Already initialized */
		*ret = auto_inc;
	
		return(0);
	}
5255

unknown's avatar
unknown committed
5256
	error = row_lock_table_autoinc_for_mysql(prebuilt);
unknown's avatar
unknown committed
5257

unknown's avatar
unknown committed
5258 5259
	if (error != DB_SUCCESS) {
		error = convert_error_code_to_mysql(error, user_thd);
5260

unknown's avatar
unknown committed
5261 5262
		goto func_exit;
	}	
unknown's avatar
unknown committed
5263

unknown's avatar
unknown committed
5264 5265
	/* Check again if someone has initialized the counter meanwhile */
	auto_inc = dict_table_autoinc_read(prebuilt->table);
unknown's avatar
unknown committed
5266

unknown's avatar
unknown committed
5267 5268 5269 5270
	if (auto_inc != 0) {
		*ret = auto_inc;
	
		return(0);
unknown's avatar
unknown committed
5271
	}
5272

unknown's avatar
unknown committed
5273 5274 5275 5276 5277 5278 5279 5280 5281
  	(void) extra(HA_EXTRA_KEYREAD);
  	index_init(table->next_number_index);

	/* We use an exclusive lock when we read the max key value from the
  	auto-increment column index. This is because then build_template will
  	advise InnoDB to fetch all columns. In SHOW TABLE STATUS the query
  	id of the auto-increment column is not changed, and previously InnoDB
  	did not fetch it, causing SHOW TABLE STATUS to show wrong values
  	for the autoinc column. */
5282

unknown's avatar
unknown committed
5283
  	prebuilt->select_lock_type = LOCK_X;
5284

unknown's avatar
unknown committed
5285 5286 5287
  	/* Play safe and also give in another way the hint to fetch
  	all columns in the key: */
  	
unknown's avatar
unknown committed
5288
	prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
5289

unknown's avatar
unknown committed
5290
	prebuilt->trx->mysql_n_tables_locked += 1;
5291
  
unknown's avatar
unknown committed
5292
	error = index_last(table->record[1]);
5293

unknown's avatar
unknown committed
5294
  	if (error) {
unknown's avatar
unknown committed
5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305
		if (error == HA_ERR_END_OF_FILE) {
			/* The table was empty, initialize to 1 */
			auto_inc = 1;

			error = 0;
		} else {
			/* Deadlock or a lock wait timeout */
  			auto_inc = -1;

  			goto func_exit;
  		}
unknown's avatar
unknown committed
5306
  	} else {
unknown's avatar
unknown committed
5307 5308
		/* Initialize to max(col) + 1 */
    		auto_inc = (longlong) table->next_number_field->
unknown's avatar
unknown committed
5309 5310
                        	val_int_offset(table->rec_buff_length) + 1;
  	}
5311

unknown's avatar
unknown committed
5312 5313 5314
	dict_table_autoinc_initialize(prebuilt->table, auto_inc);

func_exit:
unknown's avatar
unknown committed
5315
  	(void) extra(HA_EXTRA_NO_KEYREAD);
5316

unknown's avatar
unknown committed
5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344
	index_end();

	*ret = auto_inc;

  	return(error);
}

/***********************************************************************
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
counter if it already has been initialized. Returns the value of the
auto-inc counter. */

longlong
ha_innobase::get_auto_increment()
/*=============================*/
                         /* out: auto-increment column value, -1 if error
                         (deadlock or lock wait timeout) */
{
  	longlong        nr;
  	int     	error;
	
	error = innobase_read_and_init_auto_inc(&nr);

	if (error) {

		return(-1);
	}
5345

unknown's avatar
unknown committed
5346
	return(nr);
5347 5348
}

5349
/***********************************************************************
unknown's avatar
unknown committed
5350
This function stores the binlog offset and flushes logs. */
5351 5352 5353

void 
innobase_store_binlog_offset_and_flush_log(
unknown's avatar
unknown committed
5354 5355 5356 5357
/*=======================================*/
    char 	*binlog_name,	/* in: binlog name */
    longlong 	offset)		/* in: binlog offset */
{
5358 5359 5360 5361 5362 5363 5364 5365
	mtr_t mtr;
	
	assert(binlog_name != NULL);

	/* Start a mini-transaction */
        mtr_start_noninline(&mtr); 

	/* Update the latest MySQL binlog name and offset info
unknown's avatar
unknown committed
5366
        in trx sys header */
5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378

        trx_sys_update_mysql_binlog_offset(
            binlog_name,
            offset,
            TRX_SYS_MYSQL_LOG_INFO, &mtr);

        /* Commits the mini-transaction */
        mtr_commit(&mtr);
        
	/* Syncronous flush of the log buffer to disk */
	log_buffer_flush_to_disk();
}
unknown's avatar
unknown committed
5379

unknown's avatar
unknown committed
5380 5381
char*
ha_innobase::get_mysql_bin_log_name()
unknown's avatar
unknown committed
5382
{
unknown's avatar
unknown committed
5383
	return(trx_sys_mysql_bin_log_name);
unknown's avatar
unknown committed
5384 5385
}

unknown's avatar
unknown committed
5386 5387
ulonglong
ha_innobase::get_mysql_bin_log_pos()
unknown's avatar
unknown committed
5388
{
unknown's avatar
unknown committed
5389 5390 5391 5392
  	/* trx... is ib_longlong, which is a typedef for a 64-bit integer
	(__int64 or longlong) so it's ok to cast it to ulonglong. */

  	return(trx_sys_mysql_bin_log_pos);
unknown's avatar
unknown committed
5393 5394
}

5395
extern "C" {
5396
/**********************************************************************
unknown's avatar
unknown committed
5397 5398 5399 5400 5401 5402 5403
This function is used to find the storage length in bytes of the first n
characters for prefix indexes using a multibyte character set. The function
finds charset information and returns length of prefix_len characters in the
index field in bytes.

NOTE: the prototype of this function is copied to data0type.c! If you change
this function, you MUST change also data0type.c! */
5404

unknown's avatar
unknown committed
5405 5406 5407 5408 5409
ulint
innobase_get_at_most_n_mbchars(
/*===========================*/
				/* out: number of bytes occupied by the first
				n characters */
5410
	ulint charset_id,	/* in: character set id */
unknown's avatar
unknown committed
5411 5412 5413 5414 5415
	ulint prefix_len,	/* in: prefix length in bytes of the index
				(this has to be divided by mbmaxlen to get the
				number of CHARACTERS n in the prefix) */
	ulint data_len,         /* in: length of the string in bytes */
	const char* str)	/* in: character string */
5416
{
5417
	ulint char_length;	/* character length in bytes */
unknown's avatar
unknown committed
5418
	ulint n_chars;		/* number of characters in prefix */
5419
	CHARSET_INFO* charset;	/* charset used in the field */
5420

unknown's avatar
unknown committed
5421
	charset = get_charset(charset_id, MYF(MY_WME));
5422

5423 5424
	ut_ad(charset);
	ut_ad(charset->mbmaxlen);
5425

unknown's avatar
unknown committed
5426
	/* Calculate how many characters at most the prefix index contains */
5427

unknown's avatar
unknown committed
5428
	n_chars = prefix_len / charset->mbmaxlen;
5429

unknown's avatar
unknown committed
5430 5431 5432
	/* If the charset is multi-byte, then we must find the length of the
	first at most n chars in the string. If the string contains less
	characters than n, then we return the length to the end of the last
unknown's avatar
unknown committed
5433
	character. */
5434

unknown's avatar
unknown committed
5435 5436
	if (charset->mbmaxlen > 1) {
		/* my_charpos() returns the byte length of the first n_chars
unknown's avatar
unknown committed
5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452
		characters, or a value bigger than the length of str, if
		there were not enough full characters in str.

		Why does the code below work:
		Suppose that we are looking for n UTF-8 characters.

		1) If the string is long enough, then the prefix contains at
		least n complete UTF-8 characters + maybe some extra
		characters + an incomplete UTF-8 character. No problem in
		this case. The function returns the pointer to the
		end of the nth character.

		2) If the string is not long enough, then the string contains
		the complete value of a column, that is, only complete UTF-8
		characters, and we can store in the column prefix index the
		whole string. */
unknown's avatar
unknown committed
5453

unknown's avatar
unknown committed
5454 5455 5456 5457 5458
		char_length = my_charpos(charset, str,
						str + data_len, n_chars);
		if (char_length > data_len) {
			char_length = data_len;
		}		
unknown's avatar
unknown committed
5459
	} else {
unknown's avatar
unknown committed
5460 5461 5462 5463 5464
		if (data_len < prefix_len) {
			char_length = data_len;
		} else {
			char_length = prefix_len;
		}
5465
	}
5466

unknown's avatar
unknown committed
5467
	return(char_length);
5468 5469 5470
}
}

5471
#endif /* HAVE_INNOBASE_DB */