ha_innodb.cc 165 KB
Newer Older
1
/* Copyright (C) 2000 MySQL AB & Innobase Oy
2

3 4 5 6
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
7

8 9 10 11
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
12

13 14 15 16
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
17
/* This file defines the InnoDB handler: the interface between MySQL and
18 19 20
InnoDB
NOTE: You can only use noninlined InnoDB functions in this file, because we
have disables the InnoDB inlining in this file. */
21

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
22
/* TODO list for the InnoDB handler in 4.1:
23 24 25
  - Remove the flag innodb_active_trans from thd and replace it with a
    function call innodb_active_trans(thd), which looks at the InnoDB
    trx struct state field
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
26 27 28
  - Find out what kind of problems the OS X case-insensitivity causes to
    table and database names; should we 'normalize' the names like we do
    in Windows?
29
*/
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
30

31
#ifdef USE_PRAGMA_IMPLEMENTATION
32 33
#pragma implementation				// gcc: Class implementation
#endif
34 35

#include "mysql_priv.h"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
36
#include "slave.h"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
37

38 39 40 41
#ifdef HAVE_INNOBASE_DB
#include <m_ctype.h>
#include <hash.h>
#include <myisampack.h>
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
42
#include <mysys_err.h>
43
#include <my_sys.h>
44

45 46
#define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1))

47
#include "ha_innodb.h"
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
48

49
pthread_mutex_t innobase_mutex;
50
bool innodb_inited= 0;
51

52
/* Store MySQL definition of 'byte': in Linux it is char while InnoDB
53 54 55
uses unsigned char; the header univ.i which we include next defines
'byte' as a macro which expands to 'unsigned char' */

56
typedef byte	mysql_byte;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
57

58 59
#define INSIDE_HA_INNOBASE_CC

60
/* Include necessary InnoDB headers */
61
extern "C" {
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
62
#include "../innobase/include/univ.i"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
63
#include "../innobase/include/os0file.h"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
64
#include "../innobase/include/os0thread.h"
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
65 66 67 68
#include "../innobase/include/srv0start.h"
#include "../innobase/include/srv0srv.h"
#include "../innobase/include/trx0roll.h"
#include "../innobase/include/trx0trx.h"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
69
#include "../innobase/include/trx0sys.h"
70
#include "../innobase/include/mtr0mtr.h"
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
71 72 73 74 75
#include "../innobase/include/row0ins.h"
#include "../innobase/include/row0mysql.h"
#include "../innobase/include/row0sel.h"
#include "../innobase/include/row0upd.h"
#include "../innobase/include/log0log.h"
76
#include "../innobase/include/lock0lock.h"
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
77 78 79
#include "../innobase/include/dict0crea.h"
#include "../innobase/include/btr0cur.h"
#include "../innobase/include/btr0btr.h"
80
#include "../innobase/include/fsp0fsp.h"
81
#include "../innobase/include/sync0sync.h"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
82
#include "../innobase/include/fil0fil.h"
83 84 85 86 87
}

#define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */
#define HA_INNOBASE_RANGE_COUNT	  100

88 89
uint 	innobase_init_flags 	= 0;
ulong 	innobase_cache_size 	= 0;
90

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
91 92 93
/* The default values for the following, type long, start-up parameters
are declared in mysqld.cc: */

94
long innobase_mirrored_log_groups, innobase_log_files_in_group,
95
     innobase_log_file_size, innobase_log_buffer_size,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
96 97
     innobase_buffer_pool_awe_mem_mb,
     innobase_buffer_pool_size, innobase_additional_mem_pool_size,
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
98
     innobase_file_io_threads, innobase_lock_wait_timeout,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
99
     innobase_thread_concurrency, innobase_force_recovery,
100
     innobase_open_files;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
101

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
102 103
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
104
  
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
105
char*	innobase_data_home_dir			= NULL;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
106
char*	innobase_data_file_path 		= NULL;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
107
char*	innobase_log_group_home_dir		= NULL;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
108
char*	innobase_log_arch_dir			= NULL;/* unused */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
109 110
/* The following has a misleading name: starting from 4.0.5, this also
affects Windows: */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
111 112 113 114 115
char*	innobase_unix_file_flush_method		= NULL;

/* Below we have boolean-valued start-up parameters, and their default
values */

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
116
uint	innobase_flush_log_at_trx_commit	= 1;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
117
my_bool innobase_log_archive			= FALSE;/* unused */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
118 119
my_bool	innobase_use_native_aio			= FALSE;
my_bool	innobase_fast_shutdown			= TRUE;
120 121 122
my_bool innobase_very_fast_shutdown		= FALSE; /* this can be set to
							 1 just prior calling
							 innobase_end() */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
123
my_bool	innobase_file_per_table			= FALSE;
124
my_bool innobase_locks_unsafe_for_binlog        = FALSE;
125
my_bool innobase_create_status_file		= FALSE;
126

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
127
static char *internal_innobase_data_file_path	= NULL;
128

129
/* The following counter is used to convey information to InnoDB
130 131 132 133 134
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
it every INNOBASE_WAKE_INTERVAL'th step. */

#define INNOBASE_WAKE_INTERVAL	32
135
ulong	innobase_active_counter	= 0;
136 137 138

char*	innobase_home 	= NULL;

139 140
char    innodb_dummy_stmt_trx_handle = 'D';

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
141
static HASH 	innobase_open_tables;
142

143 144 145 146
#ifdef __NETWARE__  	/* some special cleanup for NetWare */
bool nw_panic = FALSE;
#endif

147
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
148 149 150 151 152 153
			      my_bool not_used __attribute__((unused)));
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);

/* General functions */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_enter_innodb(
/*=========================*/
	trx_t*	trx)	/* in: transaction handle */
{
	if (srv_thread_concurrency >= 500) {

		return;
	}

	srv_conc_enter_innodb(trx);
}

/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_exit_innodb(
/*========================*/
	trx_t*	trx)	/* in: transaction handle */
{
	if (srv_thread_concurrency >= 500) {

		return;
	}

	srv_conc_exit_innodb(trx);
}

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
188
/**********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
189
Releases possible search latch and InnoDB thread FIFO ticket. These should
190 191 192 193
be released at each SQL statement end, and also when mysqld passes the
control to the client. It does no harm to release these also in the middle
of an SQL statement. */
inline
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
void
innobase_release_stat_resources(
/*============================*/
	trx_t*	trx)	/* in: transaction object */
{
	if (trx->has_search_latch) {
		trx_search_latch_release_if_reserved(trx);
	}

	if (trx->declared_to_be_inside_innodb) {
		/* Release our possible ticket in the FIFO */

		srv_conc_force_exit_innodb(trx);
	}
}

210 211 212 213 214 215 216 217 218 219 220 221 222
/************************************************************************
Call this function when mysqld passes control to the client. That is to
avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
documentation, see handler.cc. */

void
innobase_release_temporary_latches(
/*===============================*/
        void*   innobase_tid)
{
        innobase_release_stat_resources((trx_t*)innobase_tid);
}

223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
/************************************************************************
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
when a single database operation may introduce a small need for
server utility activity, like checkpointing. */
inline
void
innobase_active_small(void)
/*=======================*/
{
	innobase_active_counter++;

	if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
		srv_active_wake_master_thread();
	}
}

240
/************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
241 242 243
Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
timeout or a deadlock. */
244 245 246 247 248
static
int
convert_error_code_to_mysql(
/*========================*/
			/* out: MySQL error code */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
249 250
	int	error,	/* in: InnoDB error code */
	THD*	thd)	/* in: user thread handle or NULL */
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
{
	if (error == DB_SUCCESS) {

		return(0);

  	} else if (error == (int) DB_DUPLICATE_KEY) {

    		return(HA_ERR_FOUND_DUPP_KEY);

 	} else if (error == (int) DB_RECORD_NOT_FOUND) {

    		return(HA_ERR_NO_ACTIVE_RECORD);

 	} else if (error == (int) DB_ERROR) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
266
    		return(-1); /* unspecified error */
267 268

 	} else if (error == (int) DB_DEADLOCK) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
269
 		/* Since we rolled back the whole transaction, we must
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
270 271 272 273 274 275
 		tell it also to MySQL so that MySQL knows to empty the
 		cached binlog for this transaction */

 		if (thd) {
 			ha_rollback(thd);
 		}
276

277 278 279 280
    		return(HA_ERR_LOCK_DEADLOCK);

 	} else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
281 282 283
		/* Since we rolled back the whole transaction, we must
		tell it also to MySQL so that MySQL knows to empty the
		cached binlog for this transaction */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
284

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
285 286 287
		if (thd) {
			ha_rollback(thd);
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
288

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
289
   		return(HA_ERR_LOCK_WAIT_TIMEOUT);
290 291 292

 	} else if (error == (int) DB_NO_REFERENCED_ROW) {

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
293
    		return(HA_ERR_NO_REFERENCED_ROW);
294 295 296

 	} else if (error == (int) DB_ROW_IS_REFERENCED) {

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
297
    		return(HA_ERR_ROW_IS_REFERENCED);
298

299
        } else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) {
300

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
301
    		return(HA_ERR_CANNOT_ADD_FOREIGN);
302

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
303 304
        } else if (error == (int) DB_CANNOT_DROP_CONSTRAINT) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
305
    		return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
306 307
						misleading, a new MySQL error
						code should be introduced */
308 309
        } else if (error == (int) DB_COL_APPEARS_TWICE_IN_INDEX) {

310
    		return(HA_ERR_CRASHED);
311

312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
 	} else if (error == (int) DB_OUT_OF_FILE_SPACE) {

    		return(HA_ERR_RECORD_FILE_FULL);

 	} else if (error == (int) DB_TABLE_IS_BEING_USED) {

    		return(HA_ERR_WRONG_COMMAND);

 	} else if (error == (int) DB_TABLE_NOT_FOUND) {

    		return(HA_ERR_KEY_NOT_FOUND);

  	} else if (error == (int) DB_TOO_BIG_RECORD) {

    		return(HA_ERR_TO_BIG_ROW);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
327 328 329 330

  	} else if (error == (int) DB_CORRUPTION) {

    		return(HA_ERR_CRASHED);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
331 332 333
  	} else if (error == (int) DB_NO_SAVEPOINT) {

    		return(HA_ERR_NO_SAVEPOINT);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
334 335 336
  	} else if (error == (int) DB_LOCK_TABLE_FULL) {

    		return(HA_ERR_LOCK_TABLE_FULL);
337
    	} else {
338
    		return(-1);			// Unknown error
339 340 341
    	}
}

342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
/*****************************************************************
If you want to print a thd that is not associated with the current thread,
you must call this function before reserving the InnoDB kernel_mutex, to
protect MySQL from setting thd->query NULL. If you print a thd of the current
thread, we know that MySQL cannot modify thd->query, and it is not necessary
to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
the kernel_mutex.
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_prepare_print_arbitrary_thd(void)
/*============================================*/
{
	VOID(pthread_mutex_lock(&LOCK_thread_count));
}

/*****************************************************************
Relases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_end_print_arbitrary_thd(void)
/*========================================*/
{
	VOID(pthread_mutex_unlock(&LOCK_thread_count));
}

371 372
/*****************************************************************
Prints info of a THD object (== user session thread) to the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
373
standard output. NOTE that /mysql/innobase/trx/trx0trx.c must contain
374
the prototype for this function! */
375
extern "C"
376 377 378
void
innobase_mysql_print_thd(
/*=====================*/
379
	FILE*   f,	/* in: output stream */
380
        void*   input_thd)/* in: pointer to a MySQL THD object */
381
{
382 383
	const THD*	thd;
	const char*	s;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
384
	char		buf[301];
385

386
        thd = (const THD*) input_thd;
387

388 389 390 391 392 393
  	fprintf(f, "MySQL thread id %lu, query id %lu",
		thd->thread_id, thd->query_id);
	if (thd->host) {
		putc(' ', f);
		fputs(thd->host, f);
	}
394

395 396 397 398
	if (thd->ip) {
		putc(' ', f);
		fputs(thd->ip, f);
	}
399

400
  	if (thd->user) {
401 402
		putc(' ', f);
		fputs(thd->user, f);
403 404
  	}

405
	if ((s = thd->proc_info)) {
406
		putc(' ', f);
407
		fputs(s, f);
408
	}
409

410
	if ((s = thd->query)) {
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
411
		/* determine the length of the query string */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
412 413 414 415 416
		uint32 i, len;
		
		len = thd->query_length;

		if (len > 300) {
417
			len = 300;	/* ADDITIONAL SAFETY: print at most
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
418
					300 chars to reduce the probability of
419
					a seg fault if there is a race in
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
420 421 422
					thd->query_length in MySQL; after
					May 14, 2004 probably no race any more,
					but better be safe */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
423
		}
424

425 426 427
                /* Use strmake to reduce the timeframe
                   for a race, compared to fwrite() */
		i= (uint) (strmake(buf, s, len) - buf);
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
428
		putc('\n', f);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
429
		fwrite(buf, 1, i, f);
430
	}
431

432
	putc('\n', f);
433 434
}

435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
/**********************************************************************
Compares NUL-terminated UTF-8 strings case insensitively.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
int
innobase_strcasecmp(
/*================*/
				/* out: 0 if a=b, <0 if a<b, >1 if a>b */
	const char*	a,	/* in: first string to compare */
	const char*	b)	/* in: second string to compare */
{
	return(my_strcasecmp(system_charset_info, a, b));
}

/**********************************************************************
Makes all characters in a NUL-terminated UTF-8 string lower case.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
void
innobase_casedn_str(
/*================*/
	char*	a)	/* in/out: string to put in lower case */
{
	my_casedn_str(system_charset_info, a);
}

465 466 467 468 469 470 471 472 473
/*************************************************************************
Creates a temporary file. */
extern "C"
int
innobase_mysql_tmpfile(void)
/*========================*/
			/* out: temporary file descriptor, or < 0 on error */
{
	char	filename[FN_REFLEN];
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
474
	int	fd2 = -1;
475
	File	fd = create_temp_file(filename, mysql_tmpdir, "ib",
476 477 478 479 480 481 482
#ifdef __WIN__
				O_BINARY | O_TRUNC | O_SEQUENTIAL |
				O_TEMPORARY | O_SHORT_LIVED |
#endif /* __WIN__ */
				O_CREAT | O_EXCL | O_RDWR,
				MYF(MY_WME));
	if (fd >= 0) {
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
483 484 485 486
#ifndef __WIN__
		/* On Windows, open files cannot be removed, but files can be
		created with the O_TEMPORARY flag to the same effect
		("delete on close"). */
487 488
		unlink(filename);
#endif /* !__WIN__ */
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506
		/* Copy the file descriptor, so that the additional resources
		allocated by create_temp_file() can be freed by invoking
		my_close().

		Because the file descriptor returned by this function
		will be passed to fdopen(), it will be closed by invoking
		fclose(), which in turn will invoke close() instead of
		my_close(). */
		fd2 = dup(fd);
		if (fd2 < 0) {
			DBUG_PRINT("error",("Got error %d on dup",fd2));
			my_errno=errno;
			my_error(EE_OUT_OF_FILERESOURCES,
				MYF(ME_BELL+ME_WAITTANG), filename, my_errno);
		}
		my_close(fd, MYF(MY_WME));
	}
	return(fd2);
507 508
}

509
/*************************************************************************
510 511
Gets the InnoDB transaction handle for a MySQL handler object, creates
an InnoDB transaction struct if the corresponding MySQL thread struct still
512
lacks one. */
513
static
514 515 516
trx_t*
check_trx_exists(
/*=============*/
517
			/* out: InnoDB transaction handle */
518 519 520 521
	THD*	thd)	/* in: user thread handle */
{
	trx_t*	trx;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
522
	ut_ad(thd == current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
523

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
524
	trx = (trx_t*) thd->transaction.all.innobase_tid;
525 526

	if (trx == NULL) {
monty@bitch.mysql.fi's avatar
monty@bitch.mysql.fi committed
527
	        DBUG_ASSERT(thd != NULL);
528
		trx = trx_allocate_for_mysql();
529

530
		trx->mysql_thd = thd;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
531 532
		trx->mysql_query_str = &((*thd).query);
		
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
533
		thd->transaction.all.innobase_tid = trx;
534

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
535
		/* The execution of a single SQL statement is denoted by
536
		a 'transaction' handle which is a dummy pointer: InnoDB
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
537 538
		remembers internally where the latest SQL statement
		started, and if error handling requires rolling back the
539
		latest statement, InnoDB does a rollback to a savepoint. */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
540

541 542
		thd->transaction.stmt.innobase_tid =
		                  (void*)&innodb_dummy_stmt_trx_handle;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
543
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560
		if (trx->magic_n != TRX_MAGIC_N) {
			mem_analyze_corruption((byte*)trx);

			ut_a(0);
		}
	}

	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	} else {
		trx->check_foreigns = TRUE;
	}

	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
		trx->check_unique_secondary = FALSE;
	} else {
		trx->check_unique_secondary = TRUE;
561 562 563 564 565 566
	}

	return(trx);
}

/*************************************************************************
567
Updates the user_thd field in a handle and also allocates a new InnoDB
568 569
transaction handle if needed, and updates the transaction fields in the
prebuilt struct. */
570
inline
571 572 573 574 575 576
int
ha_innobase::update_thd(
/*====================*/
			/* out: 0 or error code */
	THD*	thd)	/* in: thd to use the handle */
{
577 578
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
579
	
580 581
	trx = check_trx_exists(thd);

582
	if (prebuilt->trx != trx) {
583

584
		row_update_prebuilt_trx(prebuilt, trx);
585 586 587
	}

	user_thd = thd;
588

589 590 591
	return(0);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
592 593 594 595 596 597 598 599 600 601

/*   BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
     ------------------------------------------------------------

1) The use of the query cache for TBL is disabled when there is an
uncommitted change to TBL.

2) When a change to TBL commits, InnoDB stores the current value of
its global trx id counter, let us denote it by INV_TRX_ID, to the table object
in the InnoDB data dictionary, and does only allow such transactions whose
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
602
id <= INV_TRX_ID to use the query cache.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647

3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
of TBL immediately.

How this is implemented inside InnoDB:

1) Since every modification always sets an IX type table lock on the InnoDB
table, it is easy to check if there can be uncommitted modifications for a
table: just check if there are locks in the lock list of the table.

2) When a transaction inside InnoDB commits, it reads the global trx id
counter and stores the value INV_TRX_ID to the tables on which it had a lock.

3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
InnoDB calls an invalidate method for the MySQL query cache for that table.

How this is implemented inside sql_cache.cc:

1) The query cache for an InnoDB table TBL is invalidated immediately at an
INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
invalidation to the transaction commit.

2) To store or retrieve a value from the query cache of an InnoDB table TBL,
any query must first ask InnoDB's permission. We must pass the thd as a
parameter because InnoDB will look at the trx id, if any, associated with
that thd.

3) Use of the query cache for InnoDB tables is now allowed also when
AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
put restrictions on the use of the query cache.
*/

/**********************************************************************
The MySQL query cache uses this to check from InnoDB if the query cache at
the moment is allowed to operate on an InnoDB table. The SQL query must
be a non-locking SELECT.

The query cache is allowed to operate on certain query only if this function
returns TRUE for all tables in the query.

If thd is not in the autocommit state, this function also starts a new
transaction for thd if there is no active trx yet, and assigns a consistent
read view to it if there is no read view yet. */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
648
my_bool
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671
innobase_query_caching_of_table_permitted(
/*======================================*/
				/* out: TRUE if permitted, FALSE if not;
				note that the value FALSE does not mean
				we should invalidate the query cache:
				invalidation is called explicitly */
	THD*	thd,		/* in: thd of the user who is trying to
				store a result to the query cache or
				retrieve it */
	char*	full_name,	/* in: concatenation of database name,
				the null character '\0', and the table
				name */
	uint	full_name_len)	/* in: length of the full name, i.e.
				len(dbname) + len(tablename) + 1 */
{
	ibool	is_autocommit;
	trx_t*	trx;
	char	norm_name[1000];

	ut_a(full_name_len < 999);

	if (thd->variables.tx_isolation == ISO_SERIALIZABLE) {
		/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
672
		plain SELECT if AUTOCOMMIT is not on. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692
	
		return((my_bool)FALSE);
	}

	trx = (trx_t*) thd->transaction.all.innobase_tid;

	if (trx == NULL) {
		trx = check_trx_exists(thd);
	}

	innobase_release_stat_resources(trx);

	if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {

		is_autocommit = TRUE;
	} else {
		is_autocommit = FALSE;

	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710
	if (is_autocommit && trx->n_mysql_tables_in_use == 0) {
		/* We are going to retrieve the query result from the query
		cache. This cannot be a store operation to the query cache
		because then MySQL would have locks on tables already.

		TODO: if the user has used LOCK TABLES to lock the table,
		then we open a transaction in the call of row_.. below.
		That trx can stay open until UNLOCK TABLES. The same problem
		exists even if we do not use the query cache. MySQL should be
		modified so that it ALWAYS calls some cleanup function when
		the processing of a query ends!

		We can imagine we instantaneously serialize this consistent
		read trx to the current trx id counter. If trx2 would have
		changed the tables of a query result stored in the cache, and
		trx2 would have already committed, making the result obsolete,
		then trx2 would have already invalidated the cache. Thus we
		can trust the result in the cache is ok for this query. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
711 712 713 714 715 716 717 718 719 720 721 722

		return((my_bool)TRUE);
	}
	
	/* Normalize the table name to InnoDB format */

	memcpy(norm_name, full_name, full_name_len);

	norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the
					    separator between db and table */
	norm_name[full_name_len] = '\0';
#ifdef __WIN__
723
	innobase_casedn_str(norm_name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
724
#endif
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
725 726 727 728 729
	/* The call of row_search_.. will start a new transaction if it is
	not yet started */

	thd->transaction.all.innodb_active_trans = 1;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
730 731
	if (row_search_check_if_query_cache_permitted(trx, norm_name)) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
732
		/* printf("Query cache for %s permitted\n", norm_name); */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
733 734 735 736

		return((my_bool)TRUE);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
737
	/* printf("Query cache for %s NOT permitted\n", norm_name); */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
738 739 740 741 742 743 744 745

	return((my_bool)FALSE);
}

/*********************************************************************
Invalidates the MySQL query cache for the table.
NOTE that the exact prototype of this function has to be in
/innobase/row/row0ins.c! */
746
extern "C"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
747 748 749 750 751
void
innobase_invalidate_query_cache(
/*============================*/
	trx_t*	trx,		/* in: transaction which modifies the table */
	char*	full_name,	/* in: concatenation of database name, null
752 753 754 755 756
				char '\0', table name, null char'\0';
				NOTE that in Windows this is always
				in LOWER CASE! */
	ulint	full_name_len)	/* in: full name length where also the null
				chars count */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
757 758
{
	/* Argument TRUE below means we are using transactions */
759
#ifdef HAVE_QUERY_CACHE
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
760 761 762 763
	query_cache.invalidate((THD*)(trx->mysql_thd),
					(const char*)full_name,
					(uint32)full_name_len,
					TRUE);
764
#endif
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
765
}
766 767

/*********************************************************************
768 769
Get the quote character to be used in SQL identifiers.
This definition must match the one in innobase/ut/ut0ut.c! */
770
extern "C"
771 772 773
int
mysql_get_identifier_quote_char(
/*============================*/
774
				/* out: quote character to be
775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794
				used in SQL identifiers; EOF if none */
	trx_t*		trx,	/* in: transaction */
	const char*	name,	/* in: name to print */
	ulint		namelen)/* in: length of name */
{
	if (!trx || !trx->mysql_thd) {
		return(EOF);
	}
	return(get_quote_char_for_identifier((THD*) trx->mysql_thd,
						name, namelen));
}

/**************************************************************************
Obtain a pointer to the MySQL THD object, as in current_thd().  This
definition must match the one in sql/ha_innodb.cc! */
extern "C"
void*
innobase_current_thd(void)
/*======================*/
			/* out: MySQL THD object */
795
{
796
	return(current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
797 798
}

799 800 801 802
/*********************************************************************
Call this when you have opened a new table handle in HANDLER, before you
call index_read_idx() etc. Actually, we can let the cursor stay open even
over a transaction commit! Then you should call this before every operation,
803
fetch next etc. This function inits the necessary things even after a
804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823
transaction commit. */

void
ha_innobase::init_table_handle_for_HANDLER(void)
/*============================================*/
{
        row_prebuilt_t* prebuilt;

        /* If current thd does not yet have a trx struct, create one.
        If the current handle does not yet have a prebuilt struct, create
        one. Update the trx pointers in the prebuilt struct. Normally
        this operation is done in external_lock. */

        update_thd(current_thd);

        /* Initialize the prebuilt struct much like it would be inited in
        external_lock */

        prebuilt = (row_prebuilt_t*)innobase_prebuilt;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
824 825
	innobase_release_stat_resources(prebuilt->trx);

826 827 828 829 830 831 832 833
        /* If the transaction is not started yet, start it */

        trx_start_if_not_started_noninline(prebuilt->trx);

        /* Assign a read view if the transaction does not have it yet */

        trx_assign_read_view(prebuilt->trx);

834 835 836 837
	/* Set the MySQL flag to mark that there is an active transaction */

	current_thd->transaction.all.innodb_active_trans = 1;

838 839 840 841 842 843 844 845 846
        /* We did the necessary inits in this function, no need to repeat them
        in row_search_for_mysql */

        prebuilt->sql_stat_start = FALSE;

        /* We let HANDLER always to do the reads as consistent reads, even
        if the trx isolation level would have been specified as SERIALIZABLE */

        prebuilt->select_lock_type = LOCK_NONE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
847
        prebuilt->stored_select_lock_type = LOCK_NONE;
848 849 850

        /* Always fetch all columns in the index record */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
851
        prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
852 853 854 855 856

        /* We want always to fetch all columns in the whole row? Or do
	we???? */

        prebuilt->read_just_key = FALSE;
857 858

	prebuilt->used_in_HANDLER = TRUE;
859 860
}

861
/*************************************************************************
862
Opens an InnoDB database. */
863

864
bool
865 866
innobase_init(void)
/*===============*/
867
			/* out: TRUE if error */
868
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
869
	static char	current_dir[3];		/* Set if using current lib */
870 871
	int		err;
	bool		ret;
872
	char 	        *default_path;
monty@hundin.mysql.fi's avatar
merge  
monty@hundin.mysql.fi committed
873

874 875
  	DBUG_ENTER("innobase_init");

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
876
  	os_innodb_umask = (ulint)my_umask;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
877

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
878 879 880 881 882 883
	/* First calculate the default path for innodb_data_home_dir etc.,
	in case the user has not given any value.

	Note that when using the embedded server, the datadirectory is not
	necessarily the current directory of this program. */

884
	if (mysqld_embedded) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
885
		default_path = mysql_real_data_home;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
886
		fil_path_to_mysql_datadir = mysql_real_data_home;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
887 888 889 890 891 892
	} else {
	  	/* It's better to use current lib, to keep paths short */
	  	current_dir[0] = FN_CURLIB;
	  	current_dir[1] = FN_LIBCHAR;
	  	current_dir[2] = 0;
	  	default_path = current_dir;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
893 894
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
895 896
	ut_a(default_path);

897 898 899 900 901 902
	if (specialflag & SPECIAL_NO_PRIOR) {
	        srv_set_thread_priorities = FALSE;
	} else {
	        srv_set_thread_priorities = TRUE;
	        srv_query_thread_priority = QUERY_PRIOR;
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
903

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
904 905
	/* Set InnoDB initialization parameters according to the values
	read from MySQL .cnf file */
906

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
907
	/*--------------- Data files -------------------------*/
908

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
909
	/* The default dir for data files is the datadir of MySQL */
910 911

	srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
912
			 default_path);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
913

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929
	/* Set default InnoDB data file size to 10 MB and let it be
  	auto-extending. Thus users can use InnoDB in >= 4.0 without having
	to specify any startup options. */

	if (!innobase_data_file_path) {
  		innobase_data_file_path = (char*) "ibdata1:10M:autoextend";
	}

	/* Since InnoDB edits the argument in the next call, we make another
	copy of it: */

	internal_innobase_data_file_path = my_strdup(innobase_data_file_path,
						   MYF(MY_WME));

	ret = (bool) srv_parse_data_file_paths_and_sizes(
				internal_innobase_data_file_path,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
930 931 932 933 934 935
				&srv_data_file_names,
				&srv_data_file_sizes,
				&srv_data_file_is_raw_partition,
				&srv_n_data_files,
				&srv_auto_extend_last_data_file,
				&srv_last_file_size_max);
936
	if (ret == FALSE) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
937 938 939
	  	sql_print_error(
			"InnoDB: syntax error in innodb_data_file_path");
	  	DBUG_RETURN(TRUE);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
940
	}
941

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
942 943 944 945 946 947 948
	/* -------------- Log files ---------------------------*/

	/* The default dir for log files is the datadir of MySQL */
	
	if (!innobase_log_group_home_dir) {
	  	innobase_log_group_home_dir = default_path;
	}
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
949 950

#ifdef UNIV_LOG_ARCHIVE	  	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
951 952 953 954 955 956 957
	/* Since innodb_log_arch_dir has no relevance under MySQL,
	starting from 4.0.6 we always set it the same as
	innodb_log_group_home_dir: */

	innobase_log_arch_dir = innobase_log_group_home_dir;

	srv_arch_dir = innobase_log_arch_dir;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
958
#endif /* UNIG_LOG_ARCHIVE */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
959

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
960 961 962
	ret = (bool)
		srv_parse_log_group_home_dirs(innobase_log_group_home_dir,
						&srv_log_group_home_dirs);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
963

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
964 965 966 967
	if (ret == FALSE || innobase_mirrored_log_groups != 1) {
		fprintf(stderr,
		"InnoDB: syntax error in innodb_log_group_home_dir\n"
		"InnoDB: or a wrong number of mirrored log groups\n");
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
968

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
969
		DBUG_RETURN(TRUE);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
970
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
971

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
972 973 974
	/* --------------------------------------------------*/

	srv_file_flush_method_str = innobase_unix_file_flush_method;
975

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
976
	srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
977
	srv_n_log_files = (ulint) innobase_log_files_in_group;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
978 979
	srv_log_file_size = (ulint) innobase_log_file_size;

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
980
#ifdef UNIV_LOG_ARCHIVE
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
981
	srv_log_archive_on = (ulint) innobase_log_archive;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
982
#endif /* UNIV_LOG_ARCHIVE */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
983
	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
984
	srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit;
985

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
986 987
        /* We set srv_pool_size here in units of 1 kB. InnoDB internally
        changes the value so that it becomes the number of database pages. */
988 989

        if (innobase_buffer_pool_awe_mem_mb == 0) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
                /* Careful here: we first convert the signed long int to ulint
                and only after that divide */
 
                srv_pool_size = ((ulint) innobase_buffer_pool_size) / 1024;
        } else {
                srv_use_awe = TRUE;
                srv_pool_size = (ulint)
                                (1024 * innobase_buffer_pool_awe_mem_mb);
                srv_awe_window_size = (ulint) innobase_buffer_pool_size;
 
                /* Note that what the user specified as
                innodb_buffer_pool_size is actually the AWE memory window
                size in this case, and the real buffer pool size is
                determined by .._awe_mem_mb. */
        }
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1005

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1006 1007 1008
	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;

	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
1009

1010
	srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
1011 1012 1013 1014
	srv_thread_concurrency = (ulint) innobase_thread_concurrency;
	srv_force_recovery = (ulint) innobase_force_recovery;

	srv_fast_shutdown = (ibool) innobase_fast_shutdown;
1015

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1016
	srv_file_per_table = (ibool) innobase_file_per_table;
1017
        srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1018 1019

	srv_max_n_open_files = (ulint) innobase_open_files;
1020
	srv_innodb_status = (ibool) innobase_create_status_file;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1021

1022
	srv_print_verbose_log = mysqld_embedded ? 0 : 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1023

1024 1025 1026 1027 1028 1029 1030 1031
		/* Store the default charset-collation number of this MySQL
	installation */

	data_mysql_default_charset_coll = (ulint)default_charset_info->number;

	data_mysql_latin1_swedish_charset_coll =
					(ulint)my_charset_latin1.number;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1032 1033 1034 1035
	/* Store the latin1_swedish_ci character ordering table to InnoDB. For
	non-latin1_swedish_ci charsets we use the MySQL comparison functions,
	and consequently we do not need to know the ordering internally in
	InnoDB. */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1036

1037
	ut_a(0 == strcmp((char*)my_charset_latin1.name,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1038 1039
						(char*)"latin1_swedish_ci"));
	memcpy(srv_latin1_ordering, my_charset_latin1.sort_order, 256);
1040

1041 1042 1043 1044 1045 1046 1047 1048
	/* Since we in this module access directly the fields of a trx
        struct, and due to different headers and flags it might happen that
	mutex_t has a different size in this module and in InnoDB
	modules, we check at run time that the size is the same in
	these compilation modules. */

	srv_sizeof_trx_t_in_ha_innodb_cc = sizeof(trx_t);

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1049
	err = innobase_start_or_create_for_mysql();
1050 1051 1052

	if (err != DB_SUCCESS) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1053
		DBUG_RETURN(1);
1054
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1055 1056 1057 1058

	(void) hash_init(&innobase_open_tables,system_charset_info, 32, 0, 0,
			 		(hash_get_key) innobase_get_key, 0, 0);
	pthread_mutex_init(&innobase_mutex, MY_MUTEX_INIT_FAST);
1059
	innodb_inited= 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075

	/* If this is a replication slave and we needed to do a crash recovery,
	set the master binlog position to what InnoDB internally knew about
	how far we got transactions durable inside InnoDB. There is a
	problem here: if the user used also MyISAM tables, InnoDB might not
	know the right position for them.

	THIS DOES NOT WORK CURRENTLY because replication seems to initialize
	glob_mi also after innobase_init. */
	
/*	if (trx_sys_mysql_master_log_pos != -1) {
		ut_memcpy(glob_mi.log_file_name, trx_sys_mysql_master_log_name,
				1 + ut_strlen(trx_sys_mysql_master_log_name));
		glob_mi.pos = trx_sys_mysql_master_log_pos;
	}
*/
1076
  	DBUG_RETURN(0);
1077 1078 1079
}

/***********************************************************************
1080
Closes an InnoDB database. */
1081

1082
bool
1083 1084
innobase_end(void)
/*==============*/
1085
				/* out: TRUE if error */
1086
{
1087
	int	err= 0;
1088 1089 1090

	DBUG_ENTER("innobase_end");

1091 1092 1093 1094 1095
#ifdef __NETWARE__ 	/* some special cleanup for NetWare */
	if (nw_panic) {
		set_panic_flag_for_netware();
	}
#endif
1096 1097
	if (innodb_inited)
	{
1098 1099 1100 1101 1102 1103 1104 1105 1106
	  if (innobase_very_fast_shutdown) {
	    srv_very_fast_shutdown = TRUE;
	    fprintf(stderr,
"InnoDB: MySQL has requested a very fast shutdown without flushing\n"
"InnoDB: the InnoDB buffer pool to data files. At the next mysqld startup\n"
"InnoDB: InnoDB will do a crash recovery!\n");

	  }

1107 1108 1109 1110 1111 1112
	  innodb_inited= 0;
	  if (innobase_shutdown_for_mysql() != DB_SUCCESS)
	    err= 1;
	  hash_free(&innobase_open_tables);
	  my_free(internal_innobase_data_file_path,MYF(MY_ALLOW_ZERO_PTR));
	  pthread_mutex_destroy(&innobase_mutex);
1113
	}
1114

1115
  	DBUG_RETURN(err);
1116 1117 1118
}

/********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1119 1120
Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
the logs, and the name of this function should be innobase_checkpoint. */
1121

1122
bool
1123 1124
innobase_flush_logs(void)
/*=====================*/
1125
				/* out: TRUE if error */
1126
{
1127
  	bool 	result = 0;
1128 1129 1130

  	DBUG_ENTER("innobase_flush_logs");

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1131
	log_buffer_flush_to_disk();
1132

1133 1134 1135 1136
  	DBUG_RETURN(result);
}

/*********************************************************************
1137
Commits a transaction in an InnoDB database. */
1138

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1139 1140 1141 1142 1143
void
innobase_commit_low(
/*================*/
	trx_t*	trx)	/* in: transaction handle */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1144
        if (trx->conc_state == TRX_NOT_STARTED) {
1145

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1146 1147
                return;
        }
1148

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1149
#ifdef HAVE_REPLICATION
1150 1151
        if (current_thd->slave_thread) {
                /* Update the replication position info inside InnoDB */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1152

1153
                trx->mysql_master_log_file_name
1154
                                        = active_mi->rli.group_master_log_name;
monty@mysql.com's avatar
monty@mysql.com committed
1155 1156
                trx->mysql_master_log_pos= ((ib_longlong)
                   			    active_mi->rli.future_group_master_log_pos);
1157
        }
hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
1158
#endif /* HAVE_REPLICATION */
1159

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1160
	trx_commit_for_mysql(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1161 1162
}

1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
/*********************************************************************
Creates an InnoDB transaction struct for the thd if it does not yet have one.
Starts a new InnoDB transaction if a transaction is not yet started. And
assigns a new snapshot for a consistent read if the transaction does not yet
have one. */

int
innobase_start_trx_and_assign_read_view(
/*====================================*/
			/* out: 0 */
	THD*	thd)	/* in: MySQL thread handle of the user for whom
			the transaction should be committed */
{
	trx_t*	trx;

  	DBUG_ENTER("innobase_start_trx_and_assign_read_view");

	/* Create a new trx struct for thd, if it does not yet have one */

	trx = check_trx_exists(thd);

	/* This is just to play safe: release a possible FIFO ticket and
	search latch. Since we will reserve the kernel mutex, we have to
	release the search system latch first to obey the latching order. */

	innobase_release_stat_resources(trx);

	/* If the transaction is not started yet, start it */

	trx_start_if_not_started_noninline(trx);

	/* Assign a read view if the transaction does not have it yet */

	trx_assign_read_view(trx);

	/* Set the MySQL flag to mark that there is an active transaction */

	current_thd->transaction.all.innodb_active_trans = 1;

	DBUG_RETURN(0);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1205
/*********************************************************************
1206 1207
Commits a transaction in an InnoDB database or marks an SQL statement
ended. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1208

1209 1210 1211
int
innobase_commit(
/*============*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1212
			/* out: 0 */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1213
	THD*	thd,	/* in: MySQL thread handle of the user for whom
1214
			the transaction should be committed */
1215 1216
	void*	trx_handle)/* in: InnoDB trx handle or
			&innodb_dummy_stmt_trx_handle: the latter means
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1217
			that the current SQL statement ended */
1218
{
1219
	trx_t*		trx;
1220 1221 1222 1223

  	DBUG_ENTER("innobase_commit");
  	DBUG_PRINT("trans", ("ending transaction"));

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1224
	trx = check_trx_exists(thd);
1225

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1226 1227 1228
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */
1229

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1230
	innobase_release_stat_resources(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1231

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1232
	/* The flag thd->transaction.all.innodb_active_trans is set to 1 in
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1233 1234 1235

	1. ::external_lock(),
	2. ::start_stmt(),
1236
	3. innobase_query_caching_of_table_permitted(),
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1237
	4. innobase_savepoint(),
1238 1239
	5. ::init_table_handle_for_HANDLER(),
	6. innobase_start_trx_and_assign_read_view()
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1240 1241 1242 1243 1244

	and it is only set to 0 in a commit or a rollback. If it is 0 we know
	there cannot be resources to be freed and we could return immediately.
	For the time being, we play safe and do the cleanup though there should
	be nothing to clean up. */
1245

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1246 1247 1248 1249 1250 1251
	if (thd->transaction.all.innodb_active_trans == 0
	    && trx->conc_state != TRX_NOT_STARTED) {
	    
	        fprintf(stderr,
"InnoDB: Error: thd->transaction.all.innodb_active_trans == 0\n"
"InnoDB: but trx->conc_state != TRX_NOT_STARTED\n");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1252
	}
1253 1254 1255 1256

	if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
	        
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1257 1258 1259
		/* We were instructed to commit the whole transaction, or
		this is an SQL statement end and autocommit is on */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1260
		innobase_commit_low(trx);
1261 1262 1263

		thd->transaction.all.innodb_active_trans = 0;
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1264 1265 1266
	        /* We just mark the SQL statement ended and do not do a
		transaction commit */

1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277
		if (trx->auto_inc_lock) {
			/* If we had reserved the auto-inc lock for some
			table in this SQL statement we release it now */
		  	
			row_unlock_table_autoinc_for_mysql(trx);
		}
		/* Store the current undo_no of the transaction so that we
		know where to roll back if we have to roll back the next
		SQL statement */

		trx_mark_sql_stat_end(trx);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1278
	}
1279

1280 1281
	/* Tell the InnoDB server that there might be work for utility
	threads: */
1282 1283 1284

	srv_active_wake_master_thread();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1285
	DBUG_RETURN(0);
1286 1287
}

1288 1289 1290 1291
/*********************************************************************
This is called when MySQL writes the binlog entry for the current
transaction. Writes to the InnoDB tablespace info which tells where the
MySQL binlog entry for the current transaction ended. Also commits the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1292
transaction inside InnoDB but does NOT flush InnoDB log files to disk.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1293
To flush you have to call innobase_commit_complete(). We have separated
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1294 1295
flushing to eliminate the bottleneck of LOCK_log in log.cc which disabled
InnoDB's group commit capability. */
1296 1297 1298 1299

int
innobase_report_binlog_offset_and_commit(
/*=====================================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1300
                                /* out: 0 */
1301
        THD*    thd,            /* in: user thread */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1302
        void*   trx_handle,     /* in: InnoDB trx handle */
1303 1304
        char*   log_file_name,  /* in: latest binlog file name */
        my_off_t end_offset)    /* in: the offset in the binlog file
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1305
                                   up to which we wrote */
1306
{
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1307 1308 1309
	trx_t*	trx;

	trx = (trx_t*)trx_handle;
1310

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1311 1312
	ut_a(trx != NULL);

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1313 1314 1315
	trx->mysql_log_file_name = log_file_name;  	
	trx->mysql_log_offset = (ib_longlong)end_offset;
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348
	trx->flush_log_later = TRUE;

  	innobase_commit(thd, trx_handle);

	trx->flush_log_later = FALSE;

	return(0);
}

/*********************************************************************
This is called after MySQL has written the binlog entry for the current
transaction. Flushes the InnoDB log files to disk if required. */

int
innobase_commit_complete(
/*=====================*/
                                /* out: 0 */
        void*   trx_handle)     /* in: InnoDB trx handle */
{
	trx_t*	trx;

	if (srv_flush_log_at_trx_commit == 0) {

	        return(0);
	}

	trx = (trx_t*)trx_handle;

	ut_a(trx != NULL);

  	trx_commit_complete_for_mysql(trx);

	return(0);
1349 1350
}

1351
/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1352
Rolls back a transaction or the latest SQL statement. */
1353 1354 1355 1356 1357

int
innobase_rollback(
/*==============*/
			/* out: 0 or error number */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1358
	THD*	thd,	/* in: handle to the MySQL thread of the user
1359
			whose transaction should be rolled back */
1360 1361 1362
	void*	trx_handle)/* in: InnoDB trx handle or a dummy stmt handle;
			the latter means we roll back the latest SQL
			statement */
1363 1364
{
	int	error = 0;
1365
	trx_t*	trx;
1366

1367 1368 1369
	DBUG_ENTER("innobase_rollback");
	DBUG_PRINT("trans", ("aborting transaction"));

1370
	trx = check_trx_exists(thd);
1371

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1372 1373 1374 1375 1376 1377
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1378
        if (trx->auto_inc_lock) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1379 1380 1381 1382
		/* If we had reserved the auto-inc lock for some table (if
		we come here to roll back the latest SQL statement) we
		release it now before a possibly lengthy rollback */
		
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1383 1384 1385
		row_unlock_table_autoinc_for_mysql(trx);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1386 1387
	if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
1388

1389
		error = trx_rollback_for_mysql(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1390
		thd->transaction.all.innodb_active_trans = 0;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1391
	} else {
1392
		error = trx_rollback_last_sql_stat_for_mysql(trx);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1393
	}
1394

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

/*********************************************************************
Rolls back a transaction to a savepoint. */

int
innobase_rollback_to_savepoint(
/*===========================*/
				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
				no savepoint with the given name */
	THD*	thd,		/* in: handle to the MySQL thread of the user
				whose transaction should be rolled back */
	char*	savepoint_name,	/* in: savepoint name */
	my_off_t* binlog_cache_pos)/* out: position which corresponds to the
				savepoint in the binlog cache of this
				transaction, not defined if error */
{
	ib_longlong mysql_binlog_cache_pos;
	int	    error = 0;
	trx_t*	    trx;

	DBUG_ENTER("innobase_rollback_to_savepoint");

	trx = check_trx_exists(thd);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1421 1422 1423
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1424 1425

	innobase_release_stat_resources(trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1426

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1427 1428 1429
	error = trx_rollback_to_savepoint_for_mysql(trx, savepoint_name,
						&mysql_binlog_cache_pos);
	*binlog_cache_pos = (my_off_t)mysql_binlog_cache_pos;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1430

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1431
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
1432 1433 1434
}

/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461
Sets a transaction savepoint. */

int
innobase_savepoint(
/*===============*/
				/* out: always 0, that is, always succeeds */
	THD*	thd,		/* in: handle to the MySQL thread */
	char*	savepoint_name,	/* in: savepoint name */
	my_off_t binlog_cache_pos)/* in: offset up to which the current
				transaction has cached log entries to its
				binlog cache, not defined if no transaction
				active, or we are in the autocommit state, or
				binlogging is not switched on */
{
	int	error = 0;
	trx_t*	trx;

	DBUG_ENTER("innobase_savepoint");

	if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
		/* In the autocommit state there is no sense to set a
		savepoint: we return immediate success */
	        DBUG_RETURN(0);
	}

	trx = check_trx_exists(thd);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1462 1463 1464 1465 1466 1467
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479
	/* Setting a savepoint starts a transaction inside InnoDB since
	it allocates resources for it (memory to store the savepoint name,
	for example) */

	thd->transaction.all.innodb_active_trans = 1;

	error = trx_savepoint_for_mysql(trx, savepoint_name,
					     (ib_longlong)binlog_cache_pos);

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

1480
/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1481
Frees a possible InnoDB trx object associated with the current THD. */
1482 1483 1484 1485

int
innobase_close_connection(
/*======================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1486 1487 1488
			/* out: 0 or error number */
	THD*	thd)	/* in: handle to the MySQL thread of the user
			whose transaction should be rolled back */
1489
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1490
	trx_t*	trx;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1491

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1492
	trx = (trx_t*)thd->transaction.all.innobase_tid;
1493

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1494 1495
	if (NULL != trx) {
	        innobase_rollback(thd, (void*)trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1496

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1497
		trx_free_for_mysql(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1498

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1499 1500
		thd->transaction.all.innobase_tid = NULL;
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1501

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1502
	return(0);
1503
}
1504 1505 1506


/*****************************************************************************
1507
** InnoDB database tables
1508 1509 1510
*****************************************************************************/

/********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1511
Gives the file extension of an InnoDB single-table tablespace. */
1512 1513 1514 1515

const char**
ha_innobase::bas_ext() const
/*========================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1516
				/* out: file extension string */
1517
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1518
	static const char* ext[] = {".ibd", NullS};
1519

1520 1521 1522
	return(ext);
}

1523 1524 1525
/*********************************************************************
Normalizes a table name string. A normalized name consists of the
database name catenated to '/' and table name. An example:
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1526 1527
test/mytable. On Windows normalization puts both the database name and the
table name always to lower case. */
1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541
static
void
normalize_table_name(
/*=================*/
	char*		norm_name,	/* out: normalized name as a
					null-terminated string */
	const char*	name)		/* in: table name string */
{
	char*	name_ptr;
	char*	db_ptr;
	char*	ptr;

	/* Scan name from the end */

1542
	ptr = strend(name)-1;
1543 1544 1545 1546 1547 1548 1549

	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	name_ptr = ptr + 1;

monty@bitch.mysql.fi's avatar
monty@bitch.mysql.fi committed
1550
	DBUG_ASSERT(ptr > name);
1551 1552

	ptr--;
1553

1554 1555 1556 1557 1558 1559 1560 1561 1562
	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	db_ptr = ptr + 1;

	memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name));

	norm_name[name_ptr - db_ptr - 1] = '/';
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1563 1564

#ifdef __WIN__
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
1565
	innobase_casedn_str(norm_name);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1566
#endif
1567
}
1568

1569
/*********************************************************************
1570
Creates and opens a handle to a table which already exists in an InnoDB
1571 1572 1573 1574 1575 1576 1577 1578
database. */

int
ha_innobase::open(
/*==============*/
					/* out: 1 if error, 0 if success */
	const char*	name,		/* in: table name */
	int 		mode,		/* in: not used */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1579
	uint 		test_if_locked)	/* in: not used */
1580
{
1581 1582
	dict_table_t*	ib_table;
  	char		norm_name[1000];
1583
	THD*		thd;
1584 1585 1586 1587 1588 1589

	DBUG_ENTER("ha_innobase::open");

	UT_NOT_USED(mode);
	UT_NOT_USED(test_if_locked);

1590
	thd = current_thd;
1591 1592
	normalize_table_name(norm_name, name);

1593 1594
	user_thd = NULL;

1595 1596
	last_query_id = (ulong)-1;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1597 1598 1599 1600
	if (!(share=get_share(name))) {

		DBUG_RETURN(1);
	}
1601

1602 1603 1604 1605
	/* Create buffers for packing the fields of a record. Why
	table->reclength did not work here? Obviously, because char
	fields when packed actually became 1 byte longer, when we also
	stored the string length as the first byte. */
1606

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1607
	upd_and_key_val_buff_len = table->reclength + table->max_key_length
1608
							+ MAX_REF_PARTS * 3;
1609
	if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME),
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1610 1611
				     &upd_buff, upd_and_key_val_buff_len,
				     &key_val_buff, upd_and_key_val_buff_len,
1612
				     NullS)) {
1613
	  	free_share(share);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1614

1615
	  	DBUG_RETURN(1);
1616 1617
  	}

1618
	/* Get pointer to a table object in InnoDB dictionary cache */
1619

1620 1621 1622
	ib_table = dict_table_get_and_increment_handle_count(
				      		     norm_name, NULL);
 	if (NULL == ib_table) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1623 1624 1625 1626 1627 1628 1629 1630 1631
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"Cannot find table %s from the internal data dictionary\n"
"of InnoDB though the .frm file for the table exists. Maybe you\n"
"have deleted and recreated InnoDB data files but have forgotten\n"
"to delete the corresponding .frm files of InnoDB tables, or you\n"
"have moved .frm files to another database?\n"
"Look from section 15.1 of http://www.innodb.com/ibman.html\n"
"how you can resolve the problem.\n",
1632
			  norm_name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1633 1634 1635
	        free_share(share);
    		my_free((char*) upd_buff, MYF(0));
    		my_errno = ENOENT;
1636

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1637 1638 1639
    		DBUG_RETURN(1);
  	}

1640
 	if (ib_table->ibd_file_missing && !thd->tablespace_op) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1641 1642 1643 1644 1645 1646 1647 1648 1649
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"MySQL is trying to open a table handle but the .ibd file for\n"
"table %s does not exist.\n"
"Have you deleted the .ibd file from the database directory under\n"
"the MySQL datadir, or have you used DISCARD TABLESPACE?\n"
"Look from section 15.1 of http://www.innodb.com/ibman.html\n"
"how you can resolve the problem.\n",
			  norm_name);
1650
	        free_share(share);
1651
    		my_free((char*) upd_buff, MYF(0));
1652
    		my_errno = ENOENT;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1653

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1654 1655
		dict_table_decrement_handle_count(ib_table);

1656 1657 1658
    		DBUG_RETURN(1);
  	}

1659
	innobase_prebuilt = row_create_prebuilt(ib_table);
1660

1661
	((row_prebuilt_t*)innobase_prebuilt)->mysql_row_len = table->reclength;
1662

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1663 1664 1665 1666
	/* Looks like MySQL-3.23 sometimes has primary key number != 0 */

 	primary_key = table->primary_key;
	key_used_on_scan = primary_key;
1667

1668 1669 1670 1671 1672
	/* Allocate a buffer for a 'row reference'. A row reference is
	a string of bytes of length ref_length which uniquely specifies
        a row in our table. Note that MySQL may also compare two row
        references for equality by doing a simple memcmp on the strings
        of length ref_length! */
1673

1674
  	if (!row_table_got_default_clust_index(ib_table)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1675 1676 1677 1678 1679
	        if (primary_key >= MAX_KEY) {
	                fprintf(stderr,
		    "InnoDB: Error: table %s has a primary key in InnoDB\n"
		    "InnoDB: data dictionary, but not in MySQL!\n", name);
		}
1680 1681 1682

		((row_prebuilt_t*)innobase_prebuilt)
				->clust_index_was_generated = FALSE;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1683
 		/*
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1684 1685 1686 1687 1688
		  MySQL allocates the buffer for ref. key_info->key_length
		  includes space for all key columns + one byte for each column
		  that may be NULL. ref_length must be as exact as possible to
		  save space, because all row reference buffers are allocated
		  based on ref_length.
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1689
		*/
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1690
 
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1691
  		ref_length = table->key_info[primary_key].key_length;
1692
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1693 1694 1695
	        if (primary_key != MAX_KEY) {
	                fprintf(stderr,
		    "InnoDB: Error: table %s has no primary key in InnoDB\n"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1696 1697 1698 1699 1700 1701 1702 1703
		    "InnoDB: data dictionary, but has one in MySQL!\n"
		    "InnoDB: If you created the table with a MySQL\n"
                    "InnoDB: version < 3.23.54 and did not define a primary\n"
                    "InnoDB: key, but defined a unique key with all non-NULL\n"
                    "InnoDB: columns, then MySQL internally treats that key\n"
                    "InnoDB: as the primary key. You can fix this error by\n"
		    "InnoDB: dump + DROP + CREATE + reimport of the table.\n",
				name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1704 1705
		}

1706 1707 1708
		((row_prebuilt_t*)innobase_prebuilt)
				->clust_index_was_generated = TRUE;

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1709
  		ref_length = DATA_ROW_ID_LEN;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1710

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1711 1712 1713 1714 1715 1716 1717 1718 1719
		/*
		  If we automatically created the clustered index, then
		  MySQL does not know about it, and MySQL must NOT be aware
		  of the index used on scan, to make it avoid checking if we
		  update the column of the index. That is why we assert below
		  that key_used_on_scan is the undefined value MAX_KEY.
		  The column is the row id in the automatical generation case,
		  and it will never be updated anyway.
		*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1720 1721 1722 1723 1724
	       
		if (key_used_on_scan != MAX_KEY) {
	                fprintf(stderr,
"InnoDB: Warning: table %s key_used_on_scan is %lu even though there is no\n"
"InnoDB: primary key inside InnoDB.\n",
1725
				name, (ulong)key_used_on_scan);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1726
		}
1727
	}
1728

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
1729 1730
	auto_inc_counter_for_this_stat = 0;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1731 1732 1733
	block_size = 16 * 1024;	/* Index block size in InnoDB: used by MySQL
				in query optimization */

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
1734
	/* Init table lock structure */
1735
	thr_lock_data_init(&share->lock,&lock,(void*) 0);
1736 1737

  	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
1738

1739 1740 1741 1742
  	DBUG_RETURN(0);
}

/**********************************************************************
1743
Closes a handle to an InnoDB table. */
1744 1745 1746 1747 1748 1749 1750 1751 1752 1753

int
ha_innobase::close(void)
/*====================*/
				/* out: error number */
{
  	DBUG_ENTER("ha_innobase::close");

	row_prebuilt_free((row_prebuilt_t*) innobase_prebuilt);

1754
    	my_free((char*) upd_buff, MYF(0));
1755 1756
        free_share(share);

1757
	/* Tell InnoDB server that there might be work for
1758 1759 1760 1761 1762 1763 1764
	utility threads: */

	srv_active_wake_master_thread();

  	DBUG_RETURN(0);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840
/* The following accessor functions should really be inside MySQL code! */

/******************************************************************
Gets field offset for a field in a table. */
inline
uint
get_field_offset(
/*=============*/
			/* out: offset */
	TABLE*	table,	/* in: MySQL table object */
	Field*	field)	/* in: MySQL field object */
{
	return((uint) (field->ptr - (char*) table->record[0]));
}

/******************************************************************
Checks if a field in a record is SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
uint
field_in_record_is_null(
/*====================*/
			/* out: 1 if NULL, 0 otherwise */
	TABLE*	table,	/* in: MySQL table object */
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	if (!field->null_ptr) {

		return(0);
	}

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	if (record[null_offset] & field->null_bit) {

		return(1);
	}

	return(0);
}

/******************************************************************
Sets a field in a record to SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
void
set_field_in_record_to_null(
/*========================*/
	TABLE*	table,	/* in: MySQL table object */
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	record[null_offset] = record[null_offset] | field->null_bit;
}

/******************************************************************
Resets SQL NULL bits in a record to zero. */
inline
void
reset_null_bits(
/*============*/
	TABLE*	table,	/* in: MySQL table object */
	char*	record)	/* in: a row in MySQL format */
{
	bzero(record, table->null_bytes);
}

1841 1842
extern "C" {
/*****************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1843 1844 1845 1846
InnoDB uses this function to compare two data fields for which the data type
is such that we must use MySQL code to compare them. NOTE that the prototype
of this function is in rem0cmp.c in InnoDB source code! If you change this
function, remember to update the prototype there! */
1847 1848 1849

int
innobase_mysql_cmp(
1850
/*===============*/
1851 1852
					/* out: 1, 0, -1, if a is greater,
					equal, less than b, respectively */
1853
	int		mysql_type,	/* in: MySQL type */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1854
	uint		charset_number,	/* in: number of the charset */
1855 1856 1857 1858 1859 1860 1861
	unsigned char*	a,		/* in: data field */
	unsigned int	a_length,	/* in: data field length,
					not UNIV_SQL_NULL */
	unsigned char*	b,		/* in: data field */
	unsigned int	b_length)	/* in: data field length,
					not UNIV_SQL_NULL */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1862
	CHARSET_INFO*		charset;
1863
	enum_field_types	mysql_tp;
1864
	int                     ret;
1865

monty@bitch.mysql.fi's avatar
monty@bitch.mysql.fi committed
1866 1867
	DBUG_ASSERT(a_length != UNIV_SQL_NULL);
	DBUG_ASSERT(b_length != UNIV_SQL_NULL);
1868 1869 1870 1871 1872 1873 1874

	mysql_tp = (enum_field_types) mysql_type;

	switch (mysql_tp) {

	case FIELD_TYPE_STRING:
	case FIELD_TYPE_VAR_STRING:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1875 1876 1877 1878
	case FIELD_TYPE_TINY_BLOB:
	case FIELD_TYPE_MEDIUM_BLOB:
	case FIELD_TYPE_BLOB:
	case FIELD_TYPE_LONG_BLOB:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898
		/* Use the charset number to pick the right charset struct for
		the comparison. Since the MySQL function get_charset may be
		slow before Bar removes the mutex operation there, we first
		look at 2 common charsets directly. */

		if (charset_number == default_charset_info->number) {
			charset = default_charset_info;
		} else if (charset_number == my_charset_latin1.number) {
			charset = &my_charset_latin1;
		} else {
			charset = get_charset(charset_number, MYF(MY_WME));

			if (charset == NULL) {
				fprintf(stderr,
"InnoDB: fatal error: InnoDB needs charset %lu for doing a comparison,\n"
"InnoDB: but MySQL cannot find that charset.\n", (ulong)charset_number);
				ut_a(0);
			}
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1899 1900 1901 1902 1903 1904
                /* Starting from 4.1.3, we use strnncollsp() in comparisons of
                non-latin1_swedish_ci strings. NOTE that the collation order
                changes then: 'b\0\0...' is ordered BEFORE 'b  ...'. Users
                having indexes on such data need to rebuild their tables! */

                ret = charset->coll->strnncollsp(charset,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1905 1906
                                  a, a_length,
                                  b, b_length);
1907
		if (ret < 0) {
1908
		        return(-1);
1909
		} else if (ret > 0) {
1910
		        return(1);
1911
		} else {
1912
		        return(0);
1913
	        }
1914 1915 1916 1917 1918 1919 1920 1921 1922
	default:
		assert(0);
	}

	return(0);
}
}

/******************************************************************
1923
Converts a MySQL type to an InnoDB type. */
1924 1925
inline
ulint
1926 1927
get_innobase_type_from_mysql_type(
/*==============================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1928 1929 1930 1931 1932
				/* out: DATA_BINARY, DATA_VARCHAR, ... */
	ulint*	unsigned_flag,	/* out: DATA_UNSIGNED if an 'unsigned type';
				at least ENUM and SET, and unsigned integer
				types are 'unsigned types' */
	Field*	field)		/* in: MySQL field */
1933
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1934 1935 1936
	/* The following asserts try to check that the MySQL type code fits in
	8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
	the type */
1937

monty@bitch.mysql.fi's avatar
monty@bitch.mysql.fi committed
1938 1939 1940 1941 1942
	DBUG_ASSERT((ulint)FIELD_TYPE_STRING < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_VAR_STRING < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_DOUBLE < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_FLOAT < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_DECIMAL < 256);
1943

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964
	if (field->flags & UNSIGNED_FLAG) {

		*unsigned_flag = DATA_UNSIGNED;
	} else {
		*unsigned_flag = 0;
	}

	if (field->real_type() == FIELD_TYPE_ENUM
	    || field->real_type() == FIELD_TYPE_SET) {

		/* MySQL has field->type() a string type for these, but the
		data is actually internally stored as an unsigned integer
		code! */

		*unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
						flag set to zero, even though
						internally this is an unsigned
						integer type */
		return(DATA_INT);
	}

1965
	switch (field->type()) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1966 1967
	        /* NOTE that we only allow string types in DATA_MYSQL
		and DATA_VARMYSQL */
1968
		case FIELD_TYPE_VAR_STRING: if (field->binary()) {
1969 1970 1971

						return(DATA_BINARY);
					} else if (strcmp(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1972 1973
						  field->charset()->name,
						 "latin1_swedish_ci") == 0) {
1974
						return(DATA_VARCHAR);
1975 1976
					} else {
						return(DATA_VARMYSQL);
1977
					}
1978
		case FIELD_TYPE_STRING: if (field->binary()) {
1979 1980 1981

						return(DATA_FIXBINARY);
					} else if (strcmp(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1982 1983
						   field->charset()->name,
						   "latin1_swedish_ci") == 0) {
1984
						return(DATA_CHAR);
1985 1986
					} else {
						return(DATA_MYSQL);
1987
					}
1988 1989 1990 1991 1992 1993 1994 1995 1996
		case FIELD_TYPE_LONG:
		case FIELD_TYPE_LONGLONG:
		case FIELD_TYPE_TINY:
		case FIELD_TYPE_SHORT:
		case FIELD_TYPE_INT24:
		case FIELD_TYPE_DATE:
		case FIELD_TYPE_DATETIME:
		case FIELD_TYPE_YEAR:
		case FIELD_TYPE_NEWDATE:
1997 1998 1999
		case FIELD_TYPE_TIME:
		case FIELD_TYPE_TIMESTAMP:
					return(DATA_INT);
2000
		case FIELD_TYPE_FLOAT:
2001
					return(DATA_FLOAT);
2002
		case FIELD_TYPE_DOUBLE:
2003
					return(DATA_DOUBLE);
2004
		case FIELD_TYPE_DECIMAL:
2005 2006 2007 2008 2009 2010
					return(DATA_DECIMAL);
		case FIELD_TYPE_TINY_BLOB:
		case FIELD_TYPE_MEDIUM_BLOB:
		case FIELD_TYPE_BLOB:
		case FIELD_TYPE_LONG_BLOB:
					return(DATA_BLOB);
2011 2012 2013 2014 2015 2016
		default:
					assert(0);
	}

	return(0);
}
2017

2018
/***********************************************************************
2019
Stores a key value for a row to a buffer. */
2020 2021 2022 2023 2024 2025 2026

uint
ha_innobase::store_key_val_for_row(
/*===============================*/
				/* out: key value length as stored in buff */
	uint 		keynr,	/* in: key number */
	char*		buff,	/* in/out: buffer for the key value (in MySQL
2027 2028
				format) */
	uint		buff_len,/* in: buffer length */
2029
	const mysql_byte* record)/* in: row in MySQL format */
2030 2031 2032 2033 2034
{
	KEY*		key_info 	= table->key_info + keynr;
  	KEY_PART_INFO*	key_part	= key_info->key_part;
  	KEY_PART_INFO*	end		= key_part + key_info->key_parts;
	char*		buff_start	= buff;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2035 2036 2037 2038 2039
	enum_field_types mysql_type;
	Field*		field;
	ulint		blob_len;
	byte*		blob_data;
	ibool		is_null;
2040

2041 2042
  	DBUG_ENTER("store_key_val_for_row");

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058
	/* The format for storing a key field in MySQL is the following:

	1. If the column can be NULL, then in the first byte we put 1 if the
	field value is NULL, 0 otherwise.

	2. If the column is of a BLOB type (it must be a column prefix field
	in this case), then we put the length of the data in the field to the
	next 2 bytes, in the little-endian format. If the field is SQL NULL,
	then these 2 bytes are set to 0. Note that the length of data in the
	field is <= column prefix length.

	3. In a column prefix field, prefix_len next bytes are reserved for
	data. In a normal field the max field length next bytes are reserved
	for data. For a VARCHAR(n) the max field length is n. If the stored
	value is the SQL NULL then these data bytes are set to 0. */	

2059 2060 2061
	/* We have to zero-fill the buffer so that MySQL is able to use a
	simple memcmp to compare two key values to determine if they are
	equal. MySQL does this to compare contents of two 'ref' values. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2062

2063
	bzero(buff, buff_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2064

2065
  	for (; key_part != end; key_part++) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2066
	        is_null = FALSE;
2067 2068 2069 2070

    		if (key_part->null_bit) {
      			if (record[key_part->null_offset]
						& key_part->null_bit) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2071 2072 2073 2074 2075 2076
				*buff = 1;
				is_null = TRUE;
      			} else {
				*buff = 0;
			}
			buff++;
2077
    		}
2078

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2079 2080 2081 2082 2083 2084 2085
		field = key_part->field;
		mysql_type = field->type();

		if (mysql_type == FIELD_TYPE_TINY_BLOB
		    || mysql_type == FIELD_TYPE_MEDIUM_BLOB
		    || mysql_type == FIELD_TYPE_BLOB
		    || mysql_type == FIELD_TYPE_LONG_BLOB) {
2086

2087
			ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2088 2089 2090 2091 2092 2093 2094 2095 2096

		        if (is_null) {
				 buff += key_part->length + 2;
				 
				 continue;
			}
		    
		        blob_data = row_mysql_read_blob_ref(&blob_len,
				(byte*) (record
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2097
				+ (ulint)get_field_offset(table, field)),
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2098 2099
					(ulint) field->pack_length());

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2100 2101
			ut_a(get_field_offset(table, field)
						     == key_part->offset);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2102 2103 2104 2105 2106 2107 2108 2109
			if (blob_len > key_part->length) {
			        blob_len = key_part->length;
			}

			/* MySQL reserves 2 bytes for the length and the
			storage of the number is little-endian */

			ut_a(blob_len < 256);
2110
			*((byte*)buff) = (byte)blob_len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125
			buff += 2;

			memcpy(buff, blob_data, blob_len);

			buff += key_part->length;
		} else {
		        if (is_null) {
				 buff += key_part->length;
				 
				 continue;
			}
			memcpy(buff, record + key_part->offset,
							key_part->length);
			buff += key_part->length;
		}
2126 2127
  	}

2128
	ut_a(buff <= buff_start + buff_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2129 2130

	DBUG_RETURN((uint)(buff - buff_start));
2131 2132 2133
}

/******************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2134 2135
Builds a 'template' to the prebuilt struct. The template is used in fast
retrieval of just those column values MySQL needs in its processing. */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
2136
static
2137
void
2138 2139 2140 2141 2142 2143 2144 2145 2146
build_template(
/*===========*/
	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
	THD*		thd,		/* in: current user thread, used
					only if templ_type is
					ROW_MYSQL_REC_FIELDS */
	TABLE*		table,		/* in: MySQL table */
	ulint		templ_type)	/* in: ROW_MYSQL_WHOLE_ROW or
					ROW_MYSQL_REC_FIELDS */
2147
{
2148 2149
	dict_index_t*	index;
	dict_index_t*	clust_index;
2150
	mysql_row_templ_t* templ;
2151
	Field*		field;
2152 2153
	ulint		n_fields;
	ulint		n_requested_fields	= 0;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2154
	ibool		fetch_all_in_key	= FALSE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2155
	ibool		fetch_primary_key_cols	= FALSE;
2156
	ulint		i;
2157

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2158 2159 2160 2161
	if (prebuilt->select_lock_type == LOCK_X) {
		/* We always retrieve the whole clustered index record if we
		use exclusive row level locks, for example, if the read is
		done in an UPDATE statement. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2162

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2163 2164 2165
	        templ_type = ROW_MYSQL_WHOLE_ROW;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2166 2167 2168
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
	     if (prebuilt->hint_need_to_fetch_extra_cols
						== ROW_RETRIEVE_ALL_COLS) {
2169

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2170 2171
		/* We know we must at least fetch all columns in the key, or
		all columns in the table */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2172

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2173
		if (prebuilt->read_just_key) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2174
			/* MySQL has instructed us that it is enough to
2175 2176 2177 2178 2179
			fetch the columns in the key; looks like MySQL
			can set this flag also when there is only a
			prefix of the column in the key: in that case we
			retrieve the whole column from the clustered
			index */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2180

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2181 2182 2183 2184
			fetch_all_in_key = TRUE;
		} else {
			templ_type = ROW_MYSQL_WHOLE_ROW;
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2185 2186
	    } else if (prebuilt->hint_need_to_fetch_extra_cols
						== ROW_RETRIEVE_PRIMARY_KEY) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2187 2188 2189 2190 2191
		/* We must at least fetch all primary key cols. Note that if
		the clustered index was internally generated by InnoDB on the
		row id (no primary key was defined), then
		row_search_for_mysql() will always retrieve the row id to a
		special buffer in the prebuilt struct. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2192 2193 2194

		fetch_primary_key_cols = TRUE;
	    }
2195 2196
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2197
	clust_index = dict_table_get_first_index_noninline(prebuilt->table);
2198

2199
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2200
		index = prebuilt->index;
2201 2202
	} else {
		index = clust_index;
2203
	}
2204

2205 2206 2207 2208 2209 2210 2211
	if (index == clust_index) {
		prebuilt->need_to_access_clustered = TRUE;
	} else {
		prebuilt->need_to_access_clustered = FALSE;
		/* Below we check column by column if we need to access
		the clustered index */
	}
2212

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2213
	n_fields = (ulint)table->fields; /* number of columns */
2214 2215 2216 2217 2218 2219

	if (!prebuilt->mysql_template) {
		prebuilt->mysql_template = (mysql_row_templ_t*)
						mem_alloc_noninline(
					n_fields * sizeof(mysql_row_templ_t));
	}
2220

2221 2222
	prebuilt->template_type = templ_type;
	prebuilt->null_bitmap_len = table->null_bytes;
2223

2224 2225
	prebuilt->templ_contains_blob = FALSE;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2226 2227
	/* Note that in InnoDB, i is the column number. MySQL calls columns
	'fields'. */
2228
	for (i = 0; i < n_fields; i++) {
2229
		templ = prebuilt->mysql_template + n_requested_fields;
2230 2231
		field = table->field[i];

2232
		if (templ_type == ROW_MYSQL_REC_FIELDS
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2233 2234
		    && !(fetch_all_in_key
			 && dict_index_contains_col_or_prefix(index, i))
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2235 2236
		    && !(fetch_primary_key_cols
			 && dict_table_col_in_clustered_key(index->table, i))
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2237 2238 2239
		    && thd->query_id != field->query_id) {

			/* This field is not needed in the query, skip it */
2240 2241 2242 2243 2244

			goto skip_field;
		}

		n_requested_fields++;
2245

2246
		templ->col_no = i;
2247

2248 2249 2250
		if (index == clust_index) {
			templ->rec_field_no = (index->table->cols + i)
								->clust_pos;
2251
		} else {
2252 2253
			templ->rec_field_no = dict_index_get_nth_col_pos(
								index, i);
2254 2255
		}

2256 2257 2258 2259 2260 2261 2262 2263
		if (templ->rec_field_no == ULINT_UNDEFINED) {
			prebuilt->need_to_access_clustered = TRUE;
		}

		if (field->null_ptr) {
			templ->mysql_null_byte_offset =
				(ulint) ((char*) field->null_ptr
					- (char*) table->record[0]);
2264

2265 2266 2267 2268
			templ->mysql_null_bit_mask = (ulint) field->null_bit;
		} else {
			templ->mysql_null_bit_mask = 0;
		}
2269

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2270 2271 2272
		templ->mysql_col_offset = (ulint)
					get_field_offset(table, field);

2273
		templ->mysql_col_len = (ulint) field->pack_length();
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2274 2275 2276
		templ->type = index->table->cols[i].type.mtype;
		templ->is_unsigned = index->table->cols[i].type.prtype
							& DATA_UNSIGNED;
2277 2278
		templ->charset = dtype_get_charset_coll_noninline(
				index->table->cols[i].type.prtype);
2279

2280 2281
		if (templ->type == DATA_BLOB) {
			prebuilt->templ_contains_blob = TRUE;
2282
		}
2283 2284 2285
skip_field:
		;
	}
2286

2287
	prebuilt->n_template = n_requested_fields;
2288

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2289
	if (index != clust_index && prebuilt->need_to_access_clustered) {
2290 2291 2292 2293
		/* Change rec_field_no's to correspond to the clustered index
		record */
		for (i = 0; i < n_requested_fields; i++) {
			templ = prebuilt->mysql_template + i;
2294

2295 2296 2297
			templ->rec_field_no =
			    (index->table->cols + templ->col_no)->clust_pos;
		}
2298
	}
2299 2300 2301
}

/************************************************************************
2302
Stores a row in an InnoDB database, to the table specified in this
2303 2304 2305 2306 2307
handle. */

int
ha_innobase::write_row(
/*===================*/
2308 2309
				/* out: error code */
	mysql_byte* 	record)	/* in: a row in MySQL format */
2310
{
2311
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
2312
  	int 		error;
2313
	longlong	auto_inc;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2314
	longlong	dummy;
2315 2316
	ibool           incremented_auto_inc_for_stat = FALSE;
	ibool           incremented_auto_inc_counter = FALSE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2317
	ibool           skip_auto_inc_decr;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2318

2319
  	DBUG_ENTER("ha_innobase::write_row");
2320

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2321 2322 2323 2324
	if (prebuilt->trx !=
			(trx_t*) current_thd->transaction.all.innobase_tid) {
		fprintf(stderr,
"InnoDB: Error: the transaction object for the table handle is at\n"
2325 2326 2327 2328 2329 2330 2331 2332 2333
"InnoDB: %p, but for the current thread it is at %p\n",
			prebuilt->trx,
			current_thd->transaction.all.innobase_tid);
		fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
		ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200);
		fputs("\n"
			"InnoDB: Dump of 200 bytes around transaction.all: ",
			stderr);
		ut_print_buf(stderr,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2334
			((byte*)(&(current_thd->transaction.all))) - 100, 200);
2335 2336
		putc('\n', stderr);
		ut_error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2337
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2338

2339 2340
  	statistic_increment(ha_write_count, &LOCK_status);

2341 2342
        if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
                table->timestamp_field->set_time();
2343

2344
	if ((user_thd->lex->sql_command == SQLCOM_ALTER_TABLE
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
2345
	    || user_thd->lex->sql_command == SQLCOM_OPTIMIZE
2346 2347
	    || user_thd->lex->sql_command == SQLCOM_CREATE_INDEX
	    || user_thd->lex->sql_command == SQLCOM_DROP_INDEX)
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
2348
	    && num_write_row >= 10000) {
2349 2350 2351 2352 2353 2354 2355 2356
		/* ALTER TABLE is COMMITted at every 10000 copied rows.
		The IX table lock for the original table has to be re-issued.
		As this method will be called on a temporary table where the
		contents of the original table is being copied to, it is
		a bit tricky to determine the source table.  The cursor
		position in the source table need not be adjusted after the
		intermediate COMMIT, since writes by other transactions are
		being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */
2357

2358
		dict_table_t*	src_table;
2359 2360
		ibool		mode;

2361
		num_write_row = 0;
2362

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
2363 2364
		/* Commit the transaction.  This will release the table
		locks, so they have to be acquired again. */
2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384

		/* Altering an InnoDB table */
		/* Get the source table. */
		src_table = lock_get_src_table(
				prebuilt->trx, prebuilt->table, &mode);
		if (!src_table) {
		no_commit:
			/* Unknown situation: do not commit */
			/*
			ut_print_timestamp(stderr);
			fprintf(stderr,
				"  InnoDB error: ALTER TABLE is holding lock"
				" on %lu tables!\n",
				prebuilt->trx->mysql_n_tables_locked);
			*/
			;
		} else if (src_table == prebuilt->table) {
			/* Source table is not in InnoDB format:
			no need to re-acquire locks on it. */

2385 2386 2387 2388 2389 2390
			/* Altering to InnoDB format */
			innobase_commit(user_thd, prebuilt->trx);
			/* Note that this transaction is still active. */
			user_thd->transaction.all.innodb_active_trans = 1;
			/* We will need an IX lock on the destination table. */
		        prebuilt->sql_stat_start = TRUE;
2391 2392 2393 2394 2395
		} else {
			/* Ensure that there are no other table locks than
			LOCK_IX and LOCK_AUTO_INC on the destination table. */
			if (!lock_is_table_exclusive(prebuilt->table,
							prebuilt->trx)) {
2396 2397 2398 2399 2400 2401 2402 2403 2404
				goto no_commit;
			}

			/* Commit the transaction.  This will release the table
			locks, so they have to be acquired again. */
			innobase_commit(user_thd, prebuilt->trx);
			/* Note that this transaction is still active. */
			user_thd->transaction.all.innodb_active_trans = 1;
			/* Re-acquire the table lock on the source table. */
2405
			row_lock_table_for_mysql(prebuilt, src_table, mode);
2406 2407 2408
			/* We will need an IX lock on the destination table. */
		        prebuilt->sql_stat_start = TRUE;
		}
2409 2410
	}

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
2411 2412
	num_write_row++;

2413 2414 2415
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2416 2417

		innobase_release_stat_resources(prebuilt->trx);
2418 2419
	}

2420
  	if (table->next_number_field && record == table->record[0]) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2421 2422
		/* This is the case where the table has an
		auto-increment column */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447

		/* Initialize the auto-inc counter if it has not been
		initialized yet */

		if (0 == dict_table_autoinc_peek(prebuilt->table)) {

			/* This call initializes the counter */
		        error = innobase_read_and_init_auto_inc(&dummy);

			if (error) {
				/* Deadlock or lock wait timeout */

				goto func_exit;
			}

			/* We have to set sql_stat_start to TRUE because
			the above call probably has called a select, and
			has reset that flag; row_insert_for_mysql has to
			know to set the IX intention lock on the table,
			something it only does at the start of each
			statement */

			prebuilt->sql_stat_start = TRUE;
		}

2448 2449
	        /* Fetch the value the user possibly has set in the
	        autoincrement field */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2450

2451 2452
	        auto_inc = table->next_number_field->val_int();

2453 2454 2455
		/* In replication and also otherwise the auto-inc column 
		can be set with SET INSERT_ID. Then we must look at
		user_thd->next_insert_id. If it is nonzero and the user
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2456 2457 2458
		has not supplied a value, we must use it, and use values
		incremented by 1 in all subsequent inserts within the
		same SQL statement! */
2459 2460

		if (auto_inc == 0 && user_thd->next_insert_id != 0) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2461 2462 2463

		        auto_inc_counter_for_this_stat
						= user_thd->next_insert_id;
2464
		}
2465

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2466 2467 2468 2469 2470 2471 2472 2473 2474 2475
		if (auto_inc == 0 && auto_inc_counter_for_this_stat) {
			/* The user set the auto-inc counter for
			this SQL statement with SET INSERT_ID. We must
			assign sequential values from the counter. */

			auto_inc = auto_inc_counter_for_this_stat;

			/* We give MySQL a new value to place in the
			auto-inc column */
			user_thd->next_insert_id = auto_inc;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2476 2477 2478

			auto_inc_counter_for_this_stat++;
			incremented_auto_inc_for_stat = TRUE;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2479
		}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2480

2481
		if (auto_inc != 0) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2482 2483 2484
			/* This call will calculate the max of the current
			value and the value supplied by the user and
			update the counter accordingly */
2485 2486 2487 2488 2489 2490 2491 2492

			/* We have to use the transactional lock mechanism
			on the auto-inc counter of the table to ensure
			that replication and roll-forward of the binlog
			exactly imitates also the given auto-inc values.
			The lock is released at each SQL statement's
			end. */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2493
			innodb_srv_conc_enter_innodb(prebuilt->trx);
2494
			error = row_lock_table_autoinc_for_mysql(prebuilt);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2495
			innodb_srv_conc_exit_innodb(prebuilt->trx);
2496 2497

			if (error != DB_SUCCESS) {
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2498

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2499
				error = convert_error_code_to_mysql(error,
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2500
								    user_thd);
2501 2502
				goto func_exit;
			}	
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2503

2504 2505
			dict_table_autoinc_update(prebuilt->table, auto_inc);
		} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2506
			innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2507

2508 2509 2510 2511 2512
			if (!prebuilt->trx->auto_inc_lock) {

				error = row_lock_table_autoinc_for_mysql(
								prebuilt);
				if (error != DB_SUCCESS) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2513 2514
 					innodb_srv_conc_exit_innodb(
							prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2515

2516
					error = convert_error_code_to_mysql(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2517
							error, user_thd);
2518 2519 2520 2521
					goto func_exit;
				}
			}	

2522 2523 2524
			/* The following call gets the value of the auto-inc
			counter of the table and increments it by 1 */

2525
			auto_inc = dict_table_autoinc_get(prebuilt->table);
2526 2527
			incremented_auto_inc_counter = TRUE;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2528
			innodb_srv_conc_exit_innodb(prebuilt->trx);
2529

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2530 2531
			/* We can give the new value for MySQL to place in
			the field */
2532

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2533
			user_thd->next_insert_id = auto_inc;
2534
		}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2535

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2536 2537 2538
		/* This call of a handler.cc function places
		user_thd->next_insert_id to the column value, if the column
		value was not set by the user */
2539

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2540 2541
    		update_auto_increment();
	}
2542

2543 2544 2545 2546
	if (prebuilt->mysql_template == NULL
			|| prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
		/* Build the template used in converting quickly between
		the two database formats */
2547

2548 2549
		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}
2550

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2551
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2552

2553
	error = row_insert_for_mysql((byte*) record, prebuilt);
2554

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2555
	innodb_srv_conc_exit_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2556

2557 2558 2559
	if (error != DB_SUCCESS) {
	        /* If the insert did not succeed we restore the value of
		the auto-inc counter we used; note that this behavior was
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2560
		introduced only in version 4.0.4.
2561 2562
		NOTE that a REPLACE command and LOAD DATA INFILE REPLACE
		handles a duplicate key error
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2563
		itself, and we must not decrement the autoinc counter
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2564
		if we are performing those statements.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2565 2566 2567 2568 2569
		NOTE 2: if there was an error, for example a deadlock,
		which caused InnoDB to roll back the whole transaction
		already in the call of row_insert_for_mysql(), we may no
		longer have the AUTO-INC lock, and cannot decrement
		the counter here. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2570 2571

	        skip_auto_inc_decr = FALSE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2572

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2573
	        if (error == DB_DUPLICATE_KEY
2574 2575
		    && (user_thd->lex->sql_command == SQLCOM_REPLACE
			|| user_thd->lex->sql_command
2576 2577 2578
			                 == SQLCOM_REPLACE_SELECT
		    	|| (user_thd->lex->sql_command == SQLCOM_LOAD
			    && user_thd->lex->duplicates == DUP_REPLACE))) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2579 2580 2581

		        skip_auto_inc_decr= TRUE;
		}
2582

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2583 2584
	        if (!skip_auto_inc_decr && incremented_auto_inc_counter
		    && prebuilt->trx->auto_inc_lock) {
2585
	                dict_table_autoinc_decrement(prebuilt->table);
2586 2587
	        }

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2588 2589
		if (!skip_auto_inc_decr && incremented_auto_inc_for_stat
		    && prebuilt->trx->auto_inc_lock) {
2590 2591 2592 2593
		        auto_inc_counter_for_this_stat--;
		}
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2594
	error = convert_error_code_to_mysql(error, user_thd);
2595

2596
	/* Tell InnoDB server that there might be work for
2597
	utility threads: */
2598
func_exit:
2599
	innobase_active_small();
2600 2601 2602 2603

  	DBUG_RETURN(error);
}

2604
/******************************************************************
2605
Converts field data for storage in an InnoDB update vector. */
2606 2607 2608 2609 2610 2611 2612 2613 2614 2615
inline
mysql_byte*
innobase_convert_and_store_changed_col(
/*===================================*/
				/* out: pointer to the end of the converted
				data in the buffer */
	upd_field_t*	ufield,	/* in/out: field in the update vector */
	mysql_byte*	buf,	/* in: buffer we can use in conversion */
	mysql_byte*	data,	/* in: column data to store */
	ulint		len,	/* in: data len */
2616
	ulint		col_type,/* in: data type in InnoDB type numbers */
2617
	ulint		prtype)	/* InnoDB precise data type and flags */
2618
{
2619 2620 2621 2622
	uint	i;

	if (len == UNIV_SQL_NULL) {
		data = NULL;
2623 2624
	} else if (col_type == DATA_VARCHAR || col_type == DATA_BINARY
		   || col_type == DATA_VARMYSQL) {
2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649
		/* Remove trailing spaces. */

		/* Handle UCS2 strings differently.  As no new
		collations will be introduced in 4.1, we hardcode the
		charset-collation codes here.  In 5.0, the logic will
		be based on mbminlen. */
		ulint	cset	= dtype_get_charset_coll_noninline(prtype);
		if (cset == 35/*ucs2_general_ci*/
				|| cset == 90/*ucs2_bin*/
				|| (cset >= 128/*ucs2_unicode_ci*/
				&& cset <= 144/*ucs2_persian_ci*/)) {
			/* space=0x0020 */
			/* Trim "half-chars", just in case. */
			len &= ~1;

			while (len && data[len - 2] == 0x00
					&& data[len - 1] == 0x20) {
				len -= 2;
			}
		} else {
			/* space=0x20 */
			while (len && data[len - 1] == 0x20) {
				len--;
			}
		}
2650
	} else if (col_type == DATA_INT) {
2651
		/* Store integer data in InnoDB in a big-endian
2652
		format, sign bit negated, if signed */
2653

2654 2655 2656 2657
		for (i = 0; i < len; i++) {
			buf[len - 1 - i] = data[i];
		}

2658
		if (!(prtype & DATA_UNSIGNED)) {
2659 2660 2661 2662 2663 2664
			buf[0] = buf[0] ^ 128;
		}

		data = buf;

		buf += len;
2665
	}
2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683

	ufield->new_val.data = data;
	ufield->new_val.len = len;

	return(buf);
}

/**************************************************************************
Checks which fields have changed in a row and stores information
of them to an update vector. */
static
int
calc_row_difference(
/*================*/
					/* out: error number or 0 */
	upd_t*		uvect,		/* in/out: update vector */
	mysql_byte* 	old_row,	/* in: old row in MySQL format */
	mysql_byte* 	new_row,	/* in: new row in MySQL format */
2684 2685
	struct st_table* table,		/* in: table in MySQL data
					dictionary */
2686
	mysql_byte*	upd_buff,	/* in: buffer to use */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2687
	ulint		buff_len,	/* in: buffer length */
2688
	row_prebuilt_t*	prebuilt,	/* in: InnoDB prebuilt struct */
2689 2690
	THD*		thd)		/* in: user thread */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2691
	mysql_byte*	original_upd_buff = upd_buff;
2692
	Field*		field;
2693 2694 2695
	uint		n_fields;
	ulint		o_len;
	ulint		n_len;
2696 2697 2698
	byte*	        o_ptr;
        byte*	        n_ptr;
        byte*	        buf;
2699
	upd_field_t*	ufield;
2700
	ulint		col_type;
2701
	ulint		prtype;
2702
	ulint		n_changed = 0;
2703
	uint		i;
2704 2705 2706

	n_fields = table->fields;

2707
	/* We use upd_buff to convert changed fields */
2708
	buf = (byte*) upd_buff;
2709

2710 2711 2712
	for (i = 0; i < n_fields; i++) {
		field = table->field[i];

2713
		/* if (thd->query_id != field->query_id) { */
2714 2715
			/* TODO: check that these fields cannot have
			changed! */
2716

2717 2718
		/*	goto skip_field;
		}*/
2719

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2720 2721
		o_ptr = (byte*) old_row + get_field_offset(table, field);
		n_ptr = (byte*) new_row + get_field_offset(table, field);
2722 2723 2724
		o_len = field->pack_length();
		n_len = field->pack_length();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2725
		col_type = prebuilt->table->cols[i].type.mtype;
2726
		prtype = prebuilt->table->cols[i].type.prtype;
2727 2728 2729 2730 2731 2732 2733 2734 2735
		switch (col_type) {

		case DATA_BLOB:
			o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
			n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
			break;
		case DATA_VARCHAR:
		case DATA_BINARY:
		case DATA_VARMYSQL:
2736 2737 2738 2739
			o_ptr = row_mysql_read_var_ref_noninline(&o_len,
								o_ptr);
			n_ptr = row_mysql_read_var_ref_noninline(&n_len,
								n_ptr);
2740 2741 2742
		default:
			;
		}
2743

2744
		if (field->null_ptr) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2745 2746
			if (field_in_record_is_null(table, field,
							(char*) old_row)) {
2747 2748
				o_len = UNIV_SQL_NULL;
			}
2749

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2750 2751
			if (field_in_record_is_null(table, field,
							(char*) new_row)) {
2752 2753 2754 2755 2756 2757 2758 2759 2760 2761
				n_len = UNIV_SQL_NULL;
			}
		}

		if (o_len != n_len || (o_len != UNIV_SQL_NULL &&
					0 != memcmp(o_ptr, n_ptr, o_len))) {
			/* The field has changed */

			ufield = uvect->fields + n_changed;

2762 2763 2764 2765
			buf = (byte*)
                          innobase_convert_and_store_changed_col(ufield,
					  (mysql_byte*)buf,
					  (mysql_byte*)n_ptr, n_len, col_type,
2766
						prtype);
2767
			ufield->exp = NULL;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2768
			ufield->field_no = prebuilt->table->cols[i].clust_pos;
2769 2770 2771 2772 2773 2774 2775
			n_changed++;
		}
	}

	uvect->n_fields = n_changed;
	uvect->info_bits = 0;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2776 2777
	ut_a(buf <= (byte*)original_upd_buff + buff_len);

2778 2779 2780 2781 2782 2783 2784
	return(0);
}

/**************************************************************************
Updates a row given as a parameter to a new value. Note that we are given
whole rows, not just the fields which are updated: this incurs some
overhead for CPU when we check which fields are actually updated.
2785
TODO: currently InnoDB does not prevent the 'Halloween problem':
2786 2787
in a searched update a single row can get updated several times
if its index columns are updated! */
2788

2789 2790 2791 2792
int
ha_innobase::update_row(
/*====================*/
					/* out: error number or 0 */
2793 2794
	const mysql_byte* 	old_row,/* in: old row in MySQL format */
	mysql_byte* 		new_row)/* in: new row in MySQL format */
2795 2796 2797 2798 2799
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	upd_t*		uvect;
	int		error = 0;

2800
	DBUG_ENTER("ha_innobase::update_row");
2801

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2802 2803
	ut_ad(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2804

2805 2806
        if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
                table->timestamp_field->set_time();
2807

2808 2809 2810
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2811 2812

		innobase_release_stat_resources(prebuilt->trx);
2813 2814
	}

2815 2816 2817 2818 2819
	if (prebuilt->upd_node) {
		uvect = prebuilt->upd_node->update;
	} else {
		uvect = row_get_prebuilt_update_vector(prebuilt);
	}
2820 2821 2822 2823

	/* Build an update vector from the modified fields in the rows
	(uses upd_buff of the handle) */

2824
	calc_row_difference(uvect, (mysql_byte*) old_row, new_row, table,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2825 2826 2827
			upd_buff, (ulint)upd_and_key_val_buff_len,
			prebuilt, user_thd);

2828 2829 2830
	/* This is not a delete */
	prebuilt->upd_node->is_delete = FALSE;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2831
	assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
2832

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2833
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2834

2835
	error = row_update_for_mysql((byte*) old_row, prebuilt);
2836

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2837
	innodb_srv_conc_exit_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2838

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2839
	error = convert_error_code_to_mysql(error, user_thd);
2840

2841
	/* Tell InnoDB server that there might be work for
2842 2843
	utility threads: */

2844
	innobase_active_small();
2845 2846 2847 2848 2849 2850 2851 2852 2853 2854

	DBUG_RETURN(error);
}

/**************************************************************************
Deletes a row given as the parameter. */

int
ha_innobase::delete_row(
/*====================*/
2855 2856
					/* out: error number or 0 */
	const mysql_byte* record)	/* in: a row in MySQL format */
2857 2858 2859 2860
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int		error = 0;

2861
	DBUG_ENTER("ha_innobase::delete_row");
2862

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2863 2864
	ut_ad(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2865

2866 2867 2868
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2869 2870

		innobase_release_stat_resources(prebuilt->trx);
2871 2872
	}

2873 2874 2875
	if (!prebuilt->upd_node) {
		row_get_prebuilt_update_vector(prebuilt);
	}
2876 2877

	/* This is a delete */
2878

2879
	prebuilt->upd_node->is_delete = TRUE;
2880

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2881
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2882

2883
	error = row_update_for_mysql((byte*) record, prebuilt);
2884

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2885
	innodb_srv_conc_exit_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2886

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2887
	error = convert_error_code_to_mysql(error, user_thd);
2888

2889
	/* Tell the InnoDB server that there might be work for
2890 2891
	utility threads: */

2892
	innobase_active_small();
2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908

	DBUG_RETURN(error);
}

/**********************************************************************
Initializes a handle to use an index. */

int
ha_innobase::index_init(
/*====================*/
			/* out: 0 or error number */
	uint 	keynr)	/* in: key (index) number */
{
	int 	error	= 0;
  	DBUG_ENTER("index_init");

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2909
	error = change_active_index(keynr);
2910 2911 2912 2913 2914

  	DBUG_RETURN(error);
}

/**********************************************************************
2915
Currently does nothing. */
2916 2917 2918 2919 2920 2921 2922

int
ha_innobase::index_end(void)
/*========================*/
{
	int 	error	= 0;
  	DBUG_ENTER("index_end");
2923
        active_index=MAX_KEY;
2924 2925 2926 2927 2928
  	DBUG_RETURN(error);
}

/*************************************************************************
Converts a search mode flag understood by MySQL to a flag understood
2929
by InnoDB. */
2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943
inline
ulint
convert_search_mode_to_innobase(
/*============================*/
	enum ha_rkey_function	find_flag)
{
	switch (find_flag) {
  		case HA_READ_KEY_EXACT:		return(PAGE_CUR_GE);
  			/* the above does not require the index to be UNIQUE */
  		case HA_READ_KEY_OR_NEXT:	return(PAGE_CUR_GE);
		case HA_READ_KEY_OR_PREV:	return(PAGE_CUR_LE);
		case HA_READ_AFTER_KEY:		return(PAGE_CUR_G);
		case HA_READ_BEFORE_KEY:	return(PAGE_CUR_L);
		case HA_READ_PREFIX:		return(PAGE_CUR_GE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2944
	        case HA_READ_PREFIX_LAST:       return(PAGE_CUR_LE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2945 2946 2947
                case HA_READ_PREFIX_LAST_OR_PREV:return(PAGE_CUR_LE);
		  /* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always
		  pass a complete-field prefix of a key value as the search
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2948 2949 2950 2951 2952
		  tuple. I.e., it is not allowed that the last field would
		  just contain n first bytes of the full field value.
		  MySQL uses a 'padding' trick to convert LIKE 'abc%'
		  type queries so that it can use as a search tuple
		  a complete-field-prefix of a key value. Thus, the InnoDB
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2953 2954 2955 2956 2957 2958 2959
		  search mode PAGE_CUR_LE_OR_EXTENDS is never used.
		  TODO: when/if MySQL starts to use also partial-field
		  prefixes, we have to deal with stripping of spaces
		  and comparison of non-latin1 char type fields in
		  innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to
		  work correctly. */

2960 2961 2962 2963 2964
		default:			assert(0);
	}

	return(0);
}
2965

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014
/*
   BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
   ---------------------------------------------------
The following does not cover all the details, but explains how we determine
the start of a new SQL statement, and what is associated with it.

For each table in the database the MySQL interpreter may have several
table handle instances in use, also in a single SQL query. For each table
handle instance there is an InnoDB  'prebuilt' struct which contains most
of the InnoDB data associated with this table handle instance.

  A) if the user has not explicitly set any MySQL table level locks:

  1) MySQL calls ::external_lock to set an 'intention' table level lock on
the table of the handle instance. There we set
prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
true if we are taking this table handle instance to use in a new SQL
statement issued by the user. We also increment trx->n_mysql_tables_in_use.

  2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
instructions to prebuilt->template of the table handle instance in
::index_read. The template is used to save CPU time in large joins.

  3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
allocate a new consistent read view for the trx if it does not yet have one,
or in the case of a locking read, set an InnoDB 'intention' table level
lock on the table.

  4) We do the SELECT. MySQL may repeatedly call ::index_read for the
same table handle instance, if it is a join.

  5) When the SELECT ends, MySQL removes its intention table level locks
in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
 (a) we execute a COMMIT there if the autocommit is on,
 (b) we also release possible 'SQL statement level resources' InnoDB may
have for this SQL statement. The MySQL interpreter does NOT execute
autocommit for pure read transactions, though it should. That is why the
table handler in that case has to execute the COMMIT in ::external_lock.

  B) If the user has explicitly set MySQL table level locks, then MySQL
does NOT call ::external_lock at the start of the statement. To determine
when we are at the start of a new SQL statement we at the start of
::index_read also compare the query id to the latest query id where the
table handle instance was used. If it has changed, we know we are at the
start of a new SQL statement. Since the query id can theoretically
overwrap, we use this test only as a secondary way of determining the
start of a new SQL statement. */


3015 3016 3017 3018 3019 3020 3021 3022 3023
/**************************************************************************
Positions an index cursor to the index specified in the handle. Fetches the
row if any. */

int
ha_innobase::index_read(
/*====================*/
					/* out: 0, HA_ERR_KEY_NOT_FOUND,
					or error number */
3024
	mysql_byte*		buf,	/* in/out: buffer for the returned
3025
					row */
3026
	const mysql_byte* 	key_ptr,/* in: key value; if this is NULL
3027
					we position the cursor at the
3028 3029 3030
					start or end of index; this can
					also contain an InnoDB row id, in
					which case key_len is the InnoDB
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3031 3032 3033 3034
					row id length; the key value can
					also be a prefix of a full key value,
					and the last column can be a prefix
					of a full column */
3035
	uint			key_len,/* in: key value length */
3036 3037 3038 3039 3040 3041 3042 3043 3044 3045
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	ulint		mode;
	dict_index_t*	index;
	ulint		match_mode 	= 0;
	int 		error;
	ulint		ret;

  	DBUG_ENTER("index_read");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3046

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3047 3048
	ut_ad(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3049

3050
  	statistic_increment(ha_read_key_count, &LOCK_status);
3051

3052 3053 3054
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
3055 3056

		innobase_release_stat_resources(prebuilt->trx);
3057 3058
	}

3059
	index = prebuilt->index;
3060

3061 3062
	/* Note that if the index for which the search template is built is not
        necessarily prebuilt->index, but can also be the clustered index */
3063

3064 3065 3066 3067
	if (prebuilt->sql_stat_start) {
		build_template(prebuilt, user_thd, table,
							ROW_MYSQL_REC_FIELDS);
	}
3068 3069

	if (key_ptr) {
3070 3071 3072
	        /* Convert the search key value to InnoDB format into
		prebuilt->search_tuple */

3073
		row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3074 3075 3076 3077
					(byte*) key_val_buff,
					(ulint)upd_and_key_val_buff_len,
					index,
					(byte*) key_ptr,
3078
					(ulint) key_len, prebuilt->trx);
3079 3080 3081 3082 3083 3084
	} else {
		/* We position the cursor to the last or the first entry
		in the index */

 		dtuple_set_n_fields(prebuilt->search_tuple, 0);
	}
3085

3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099
	mode = convert_search_mode_to_innobase(find_flag);

	match_mode = 0;

	if (find_flag == HA_READ_KEY_EXACT) {
		match_mode = ROW_SEL_EXACT;

	} else if (find_flag == HA_READ_PREFIX
				|| find_flag == HA_READ_PREFIX_LAST) {
		match_mode = ROW_SEL_EXACT_PREFIX;
	}

	last_match_mode = match_mode;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3100
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3101

3102
	ret = row_search_for_mysql((byte*) buf, mode, prebuilt, match_mode, 0);
3103

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3104
	innodb_srv_conc_exit_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3105

3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117
	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3118
		error = convert_error_code_to_mysql(ret, user_thd);
3119 3120
		table->status = STATUS_NOT_FOUND;
	}
3121

3122 3123 3124
	DBUG_RETURN(error);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3125 3126 3127
/***********************************************************************
The following functions works like index_read, but it find the last
row with the current key value or prefix. */
3128 3129

int
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3130 3131 3132 3133 3134 3135 3136 3137 3138
ha_innobase::index_read_last(
/*=========================*/
			           /* out: 0, HA_ERR_KEY_NOT_FOUND, or an
				   error code */
        mysql_byte*       buf,     /* out: fetched row */
        const mysql_byte* key_ptr, /* in: key value, or a prefix of a full
				   key value */
	uint              key_len) /* in: length of the key val or prefix
				   in bytes */
3139
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3140
        return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
3141 3142
}

3143
/************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3144
Changes the active index of a handle. */
3145 3146 3147 3148

int
ha_innobase::change_active_index(
/*=============================*/
3149 3150 3151
			/* out: 0 or error code */
	uint 	keynr)	/* in: use this index; MAX_KEY means always clustered
			index, even if it was internally generated by
3152
			InnoDB */
3153
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3154 3155 3156 3157
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	KEY*		key=0;
	statistic_increment(ha_read_key_count, &LOCK_status);
	DBUG_ENTER("change_active_index");
3158

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3159 3160 3161
	ut_ad(user_thd == current_thd);
	ut_ad(prebuilt->trx ==
	     (trx_t*) current_thd->transaction.all.innobase_tid);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3162

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3163
	active_index = keynr;
3164

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3165 3166
	if (keynr != MAX_KEY && table->keys > 0) {
		key = table->key_info + active_index;
3167

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3168
		prebuilt->index = dict_table_get_index_noninline(
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3169 3170
						     prebuilt->table,
						     key->name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3171 3172
        } else {
		prebuilt->index = dict_table_get_first_index_noninline(
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3173
							   prebuilt->table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3174
	}
3175

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3176 3177 3178 3179 3180 3181
	if (!prebuilt->index) {
	       sql_print_error(
"Innodb could not find key n:o %u with name %s from dict cache for table %s",
	      keynr, key ? key->name : "NULL", prebuilt->table->name);
	      DBUG_RETURN(1);
	}
3182

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3183
	assert(prebuilt->search_tuple != 0);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3184

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3185
	dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
3186

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3187
	dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
3188
			prebuilt->index->n_fields);
3189

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3190 3191 3192 3193 3194
	/* MySQL changes the active index for a handle also during some
	queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
	and then calculates the sum. Previously we played safe and used
	the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
	copying. Starting from MySQL-4.1 we use a more efficient flag here. */
3195

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3196
	build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
3197

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3198
	DBUG_RETURN(0);
3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209
}

/**************************************************************************
Positions an index cursor to the index specified in keynr. Fetches the
row if any. */
/* ??? This is only used to read whole keys ??? */

int
ha_innobase::index_read_idx(
/*========================*/
					/* out: error number or 0 */
3210
	mysql_byte*	buf,		/* in/out: buffer for the returned
3211 3212
					row */
	uint 		keynr,		/* in: use this index */
3213
	const mysql_byte* key,		/* in: key value; if this is NULL
3214 3215 3216 3217 3218
					we position the cursor at the
					start or end of index */
	uint		key_len,	/* in: key value length */
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3219 3220 3221 3222
	if (change_active_index(keynr)) {

		return(1);
	}
3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235

	return(index_read(buf, key, key_len, find_flag));
}

/***************************************************************************
Reads the next or previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::general_fetch(
/*=======================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3236
	mysql_byte* 	buf,	/* in/out: buffer for next row in MySQL
3237 3238 3239 3240 3241 3242 3243 3244
				format */
	uint 	direction,	/* in: ROW_SEL_NEXT or ROW_SEL_PREV */
	uint	match_mode)	/* in: 0, ROW_SEL_EXACT, or
				ROW_SEL_EXACT_PREFIX */
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	ulint		ret;
	int		error	= 0;
3245

3246
	DBUG_ENTER("general_fetch");
3247

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3248 3249
	ut_ad(prebuilt->trx ==
	     (trx_t*) current_thd->transaction.all.innobase_tid);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3250

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3251
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3252

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3253 3254
	ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode,
								direction);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3255
	innodb_srv_conc_exit_innodb(prebuilt->trx);
3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268

	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3269
		error = convert_error_code_to_mysql(ret, user_thd);
3270 3271
		table->status = STATUS_NOT_FOUND;
	}
3272

3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284
	DBUG_RETURN(error);
}

/***************************************************************************
Reads the next row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_next(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3285
	mysql_byte* 	buf)	/* in/out: buffer for next row in MySQL
3286 3287
				format */
{
3288 3289
  	statistic_increment(ha_read_next_count, &LOCK_status);

3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300
	return(general_fetch(buf, ROW_SEL_NEXT, 0));
}

/***********************************************************************
Reads the next row matching to the key value given as the parameter. */

int
ha_innobase::index_next_same(
/*=========================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3301 3302
	mysql_byte* 	buf,	/* in/out: buffer for the row */
	const mysql_byte* key,	/* in: key value */
3303 3304
	uint 		keylen)	/* in: key value length */
{
3305
  	statistic_increment(ha_read_next_count, &LOCK_status);
3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318

	return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
}

/***************************************************************************
Reads the previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_prev(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3319
	mysql_byte* 	buf)	/* in/out: buffer for previous row in MySQL
3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331
				format */
{
	return(general_fetch(buf, ROW_SEL_PREV, 0));
}

/************************************************************************
Positions a cursor on the first record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_first(
/*=====================*/
3332
				/* out: 0, HA_ERR_END_OF_FILE,
3333 3334
				or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
3335 3336 3337 3338 3339 3340 3341 3342
{
	int	error;

  	DBUG_ENTER("index_first");
  	statistic_increment(ha_read_first_count, &LOCK_status);

  	error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);

3343 3344 3345 3346 3347 3348
        /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */

  	if (error == HA_ERR_KEY_NOT_FOUND) {
  		error = HA_ERR_END_OF_FILE;
  	}

3349 3350 3351 3352 3353 3354 3355 3356 3357 3358
  	DBUG_RETURN(error);
}

/************************************************************************
Positions a cursor on the last record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_last(
/*====================*/
3359 3360
				/* out: 0, HA_ERR_END_OF_FILE, or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
3361 3362 3363
{
	int	error;

3364
  	DBUG_ENTER("index_last");
3365
  	statistic_increment(ha_read_last_count, &LOCK_status);
3366 3367 3368

  	error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);

3369
        /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384

  	if (error == HA_ERR_KEY_NOT_FOUND) {
  		error = HA_ERR_END_OF_FILE;
  	}

  	DBUG_RETURN(error);
}

/********************************************************************
Initialize a table scan. */

int
ha_innobase::rnd_init(
/*==================*/
			/* out: 0 or error number */
3385
	bool	scan)	/* in: ???????? */
3386
{
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3387
	int	err;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3388

3389
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
3390

3391 3392 3393
	/* Store the active index value so that we can restore the original
	value after a scan */

3394
	if (prebuilt->clust_index_was_generated) {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3395
		err = change_active_index(MAX_KEY);
3396
	} else {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3397
		err = change_active_index(primary_key);
3398
	}
3399

3400
  	start_of_scan = 1;
3401

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3402
 	return(err);
3403 3404 3405
}

/*********************************************************************
3406
Ends a table scan. */
3407 3408 3409 3410 3411 3412

int
ha_innobase::rnd_end(void)
/*======================*/
				/* out: 0 or error number */
{
3413
	return(index_end());
3414 3415 3416 3417 3418 3419 3420 3421 3422 3423
}

/*********************************************************************
Reads the next row in a table scan (also used to read the FIRST row
in a table scan). */

int
ha_innobase::rnd_next(
/*==================*/
			/* out: 0, HA_ERR_END_OF_FILE, or error number */
3424
	mysql_byte* buf)/* in/out: returns the row in this buffer,
3425 3426
			in MySQL format */
{
3427
	int	error;
3428 3429 3430 3431

  	DBUG_ENTER("rnd_next");
  	statistic_increment(ha_read_rnd_next_count, &LOCK_status);

3432
  	if (start_of_scan) {
3433 3434 3435 3436
		error = index_first(buf);
		if (error == HA_ERR_KEY_NOT_FOUND) {
			error = HA_ERR_END_OF_FILE;
		}
3437
		start_of_scan = 0;
3438
	} else {
3439
		error = general_fetch(buf, ROW_SEL_NEXT, 0);
3440
	}
3441

3442 3443 3444 3445
  	DBUG_RETURN(error);
}

/**************************************************************************
3446
Fetches a row from the table based on a row reference. */
3447

3448 3449 3450
int
ha_innobase::rnd_pos(
/*=================*/
3451 3452 3453
				/* out: 0, HA_ERR_KEY_NOT_FOUND,
				or error code */
	mysql_byte* 	buf,	/* in/out: buffer for the row */
3454 3455 3456 3457 3458
	mysql_byte*	pos)	/* in: primary key value of the row in the
				MySQL format, or the row id if the clustered
				index was internally generated by InnoDB;
				the length of data in pos has to be
				ref_length */
3459
{
3460 3461 3462
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int		error;
	uint		keynr	= active_index;
3463
	DBUG_ENTER("rnd_pos");
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3464
	DBUG_DUMP("key", (char*) pos, ref_length);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3465

3466
	statistic_increment(ha_read_rnd_count, &LOCK_status);
3467

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3468 3469
	ut_ad(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3470

3471 3472 3473 3474
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from the row id: the
		row reference is the row id, not any key value
3475
		that MySQL knows of */
3476

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3477
		error = change_active_index(MAX_KEY);
3478
	} else {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3479
		error = change_active_index(primary_key);
3480
	}
3481

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3482
	if (error) {
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3483
	        DBUG_PRINT("error",("Got error: %ld",error));
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3484 3485
		DBUG_RETURN(error);
	}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3486

3487 3488 3489 3490
	/* Note that we assume the length of the row reference is fixed
        for the table, and it is == ref_length */

	error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3491 3492 3493 3494
	if (error)
	{
	  DBUG_PRINT("error",("Got error: %ld",error));
	}
3495
	change_active_index(keynr);
3496

3497 3498 3499 3500
  	DBUG_RETURN(error);
}

/*************************************************************************
3501
Stores a reference to the current row to 'ref' field of the handle. Note
3502 3503
that in the case where we have generated the clustered index for the
table, the function parameter is illogical: we MUST ASSUME that 'record'
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3504
is the current 'position' of the handle, because if row ref is actually
3505
the row id internally generated in InnoDB, then 'record' does not contain
3506 3507
it. We just guess that the row id must be for the record where the handle
was positioned the last time. */
3508 3509 3510 3511

void
ha_innobase::position(
/*==================*/
3512
	const mysql_byte*	record)	/* in: row in MySQL format */
3513
{
3514 3515
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	uint		len;
3516

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3517 3518
	ut_ad(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3519

3520 3521 3522 3523
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from row id: the
		row reference will be the row id, not any key value
3524
		that MySQL knows of */
3525 3526 3527 3528 3529

		len = DATA_ROW_ID_LEN;

		memcpy(ref, prebuilt->row_id, len);
	} else {
3530 3531
		len = store_key_val_for_row(primary_key, (char*)ref,
							 ref_length, record);
3532
	}
3533

3534 3535 3536
	/* Since we do not store len to the buffer 'ref', we must assume
	that len is always fixed for this table. The following assertion
	checks this. */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3537
  
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3538 3539 3540
	if (len != ref_length) {
	        fprintf(stderr,
	 "InnoDB: Error: stored ref len is %lu, but table ref len is %lu\n",
3541
		  (ulong)len, (ulong)ref_length);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3542
	}
3543 3544 3545
}

/*********************************************************************
3546
Creates a table definition to an InnoDB database. */
3547 3548 3549 3550
static
int
create_table_def(
/*=============*/
3551
	trx_t*		trx,		/* in: InnoDB transaction handle */
3552 3553
	TABLE*		form,		/* in: information on table
					columns and indexes */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3554 3555 3556 3557 3558 3559 3560 3561 3562
	const char*	table_name,	/* in: table name */
	const char*	path_of_temp_table)/* in: if this is a table explicitly
					created by the user with the
					TEMPORARY keyword, then this
					parameter is the dir path where the
					table should be placed if we create
					an .ibd file for it (no .ibd extension
					in the path, though); otherwise this
					is NULL */
3563 3564 3565 3566 3567 3568
{
	Field*		field;
	dict_table_t*	table;
	ulint		n_cols;
  	int 		error;
  	ulint		col_type;
3569 3570
  	ulint		nulls_allowed;
	ulint		unsigned_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3571
	ulint		binary_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3572
	ulint		charset_no;
3573
  	ulint		i;
3574

3575 3576 3577 3578 3579
  	DBUG_ENTER("create_table_def");
  	DBUG_PRINT("enter", ("table_name: %s", table_name));

	n_cols = form->fields;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3580 3581
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
3582 3583

	table = dict_mem_table_create((char*) table_name, 0, n_cols);
3584

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3585 3586 3587 3588 3589
	if (path_of_temp_table) {
		table->dir_path_of_temp_table =
			mem_heap_strdup(table->heap, path_of_temp_table);
	}

3590 3591 3592
	for (i = 0; i < n_cols; i++) {
		field = form->field[i];

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3593 3594
		col_type = get_innobase_type_from_mysql_type(&unsigned_type,
								field);
3595 3596 3597 3598 3599 3600
		if (field->null_ptr) {
			nulls_allowed = 0;
		} else {
			nulls_allowed = DATA_NOT_NULL;
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3601
		if (field->binary()) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3602 3603 3604 3605 3606
			binary_type = DATA_BINARY_TYPE;
		} else {
			binary_type = 0;
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3607 3608 3609 3610 3611 3612 3613 3614 3615 3616
		charset_no = 0;	

		if (dtype_is_string_type(col_type)) {

			charset_no = (ulint)field->charset()->number;

			ut_a(charset_no < 256); /* in ut0type.h we assume that
						the number fits in one byte */
		}

3617
		dict_mem_table_add_col(table, (char*) field->field_name,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3618 3619
					col_type, dtype_form_prtype( 
					(ulint)field->type()
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3620
					| nulls_allowed | unsigned_type
3621
					| binary_type,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3622
					+ charset_no),
3623 3624 3625 3626 3627
					field->pack_length(), 0);
	}

	error = row_create_table_for_mysql(table, trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3628
	error = convert_error_code_to_mysql(error, NULL);
3629 3630 3631 3632 3633

	DBUG_RETURN(error);
}

/*********************************************************************
3634
Creates an index in an InnoDB database. */
3635 3636
static
int
3637 3638
create_index(
/*=========*/
3639
	trx_t*		trx,		/* in: InnoDB transaction handle */
3640 3641 3642 3643 3644
	TABLE*		form,		/* in: information on table
					columns and indexes */
	const char*	table_name,	/* in: table name */
	uint		key_num)	/* in: index number */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3645
	Field*		field;
3646
	dict_index_t*	index;
3647
  	int 		error;
3648 3649 3650 3651
	ulint		n_fields;
	KEY*		key;
	KEY_PART_INFO*	key_part;
	ulint		ind_type;
3652 3653
	ulint		col_type;
	ulint		prefix_len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3654
	ulint		is_unsigned;
3655
  	ulint		i;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3656
  	ulint		j;
3657

3658
  	DBUG_ENTER("create_index");
3659

3660 3661 3662
	key = form->key_info + key_num;

    	n_fields = key->key_parts;
3663

3664 3665
    	ind_type = 0;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3666
    	if (key_num == form->primary_key) {
3667 3668
		ind_type = ind_type | DICT_CLUSTERED;
	}
3669

3670 3671 3672 3673
	if (key->flags & HA_NOSAME ) {
		ind_type = ind_type | DICT_UNIQUE;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3674 3675
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
3676 3677 3678 3679 3680 3681

	index = dict_mem_index_create((char*) table_name, key->name, 0,
						ind_type, n_fields);
	for (i = 0; i < n_fields; i++) {
		key_part = key->key_part + i;

3682
		/* (The flag HA_PART_KEY_SEG denotes in MySQL a column prefix
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3683 3684 3685 3686 3687 3688 3689 3690 3691 3692
		field in an index: we only store a specified number of first
		bytes of the column to the index field.) The flag does not
		seem to be properly set by MySQL. Let us fall back on testing
		the length of the key part versus the column. */
		
		field = NULL;
		for (j = 0; j < form->fields; j++) {

			field = form->field[j];

3693 3694 3695
			if (0 == innobase_strcasecmp(
					field->field_name,
					key_part->field->field_name)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3696 3697 3698 3699 3700 3701 3702 3703
				/* Found the corresponding column */

				break;
			}
		}

		ut_a(j < form->fields);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3704 3705
		col_type = get_innobase_type_from_mysql_type(
					&is_unsigned, key_part->field);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3706 3707 3708 3709

		if (DATA_BLOB == col_type
		    || key_part->length < field->pack_length()) {

3710 3711 3712 3713 3714 3715 3716 3717
		        prefix_len = key_part->length;

			if (col_type == DATA_INT
			    || col_type == DATA_FLOAT
			    || col_type == DATA_DOUBLE
			    || col_type == DATA_DECIMAL) {
			        fprintf(stderr,
"InnoDB: error: MySQL is trying to create a column prefix index field\n"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3718 3719
"InnoDB: on an inappropriate data type. Table name %s, column name %s.\n",
				  table_name, key_part->field->field_name);
3720 3721 3722 3723 3724
			        
			        prefix_len = 0;
			}
		} else {
		        prefix_len = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3725 3726
		}

3727 3728
		/* We assume all fields should be sorted in ascending
		order, hence the '0': */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3729

3730
		dict_mem_index_add_field(index,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3731 3732
				(char*) key_part->field->field_name,
				0, prefix_len);
3733 3734 3735 3736
	}

	error = row_create_index_for_mysql(index, trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3737
	error = convert_error_code_to_mysql(error, NULL);
3738 3739 3740 3741 3742

	DBUG_RETURN(error);
}

/*********************************************************************
3743
Creates an index to an InnoDB table when the user has defined no
3744
primary index. */
3745 3746
static
int
3747 3748
create_clustered_index_when_no_primary(
/*===================================*/
3749
	trx_t*		trx,		/* in: InnoDB transaction handle */
3750 3751 3752
	const char*	table_name)	/* in: table name */
{
	dict_index_t*	index;
3753 3754
  	int 		error;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3755 3756
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
3757

monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
3758 3759 3760
	index = dict_mem_index_create((char*) table_name,
				      (char*) "GEN_CLUST_INDEX",
				      0, DICT_CLUSTERED, 0);
3761 3762
	error = row_create_index_for_mysql(index, trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3763
	error = convert_error_code_to_mysql(error, NULL);
3764

3765
	return(error);
3766 3767 3768
}

/*********************************************************************
3769
Creates a new table to an InnoDB database. */
3770 3771 3772 3773 3774 3775 3776 3777

int
ha_innobase::create(
/*================*/
					/* out: error number */
	const char*	name,		/* in: table name */
	TABLE*		form,		/* in: information on table
					columns and indexes */
3778 3779 3780
	HA_CREATE_INFO*	create_info)	/* in: more information of the
					created table, contains also the
					create statement string */
3781 3782 3783
{
	int		error;
	dict_table_t*	innobase_table;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3784
	trx_t*		parent_trx;
3785
	trx_t*		trx;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3786
	int		primary_key_no;
3787
	uint		i;
3788 3789
	char		name2[FN_REFLEN];
	char		norm_name[FN_REFLEN];
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3790
	THD		*thd= current_thd;
jan@hundin.mysql.fi's avatar
jan@hundin.mysql.fi committed
3791
	ib_longlong     auto_inc_value;
3792

3793 3794
  	DBUG_ENTER("ha_innobase::create");

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3795
	DBUG_ASSERT(thd != NULL);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3796

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3797 3798 3799 3800
	if (form->fields > 1000) {
		/* The limit probably should be REC_MAX_N_FIELDS - 3 = 1020,
		but we play safe here */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3801
	        DBUG_RETURN(HA_ERR_TO_BIG_ROW);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3802 3803
	} 

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3804 3805 3806
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3807
	parent_trx = check_trx_exists(current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3808 3809 3810 3811 3812 3813

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	
	
3814
	trx = trx_allocate_for_mysql();
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3815 3816 3817
		
	trx->mysql_thd = thd;
	trx->mysql_query_str = &((*thd).query);
3818

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3819
	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3820 3821 3822
		trx->check_foreigns = FALSE;
	}

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3823
	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3824 3825 3826
		trx->check_unique_secondary = FALSE;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3827 3828 3829 3830 3831
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3832

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3833
	fn_format(name2, name, "", "", 2);	// Remove the .frm extension
3834 3835

	normalize_table_name(norm_name, name2);
3836

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3837
	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3838
	or lock waits can happen in it during a table create operation.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3839
	Drop table etc. do this latching in row0mysql.c. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3840

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3841
	row_mysql_lock_data_dictionary(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3842 3843

	/* Create the table definition in InnoDB */
3844

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3845 3846 3847 3848 3849 3850 3851
	if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {

  		error = create_table_def(trx, form, norm_name, name2);
	} else {
		error = create_table_def(trx, form, norm_name, NULL);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3852
  	if (error) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3853
		innobase_commit_low(trx);
3854

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3855
		row_mysql_unlock_data_dictionary(trx);
3856 3857 3858 3859 3860 3861

  		trx_free_for_mysql(trx);

 		DBUG_RETURN(error);
 	}

3862 3863
	/* Look for a primary key */

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3864 3865 3866
	primary_key_no= (table->primary_key != MAX_KEY ?
			 (int) table->primary_key : 
			 -1);
3867

3868 3869 3870
	/* Our function row_get_mysql_key_number_for_index assumes
	the primary key is always number 0, if it exists */

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3871
	DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0);
3872

3873 3874
	/* Create the keys */

3875 3876 3877
	if (form->keys == 0 || primary_key_no == -1) {
		/* Create an index which is used as the clustered index;
		order the rows by their row id which is internally generated
3878
		by InnoDB */
3879

3880
		error = create_clustered_index_when_no_primary(trx,
3881
							norm_name);
3882
  		if (error) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3883 3884
			innobase_commit_low(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3885
			row_mysql_unlock_data_dictionary(trx);
3886

3887 3888 3889 3890
			trx_free_for_mysql(trx);

			DBUG_RETURN(error);
      		}
3891 3892 3893
	}

	if (primary_key_no != -1) {
3894
		/* In InnoDB the clustered index must always be created
3895
		first */
monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
3896 3897
	    	if ((error = create_index(trx, form, norm_name,
					  (uint) primary_key_no))) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3898 3899
			innobase_commit_low(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3900
			row_mysql_unlock_data_dictionary(trx);
3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911

  			trx_free_for_mysql(trx);

			DBUG_RETURN(error);
      		}
      	}

	for (i = 0; i < form->keys; i++) {

		if (i != (uint) primary_key_no) {

monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
3912
    			if ((error = create_index(trx, form, norm_name, i))) {
3913

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3914
			  	innobase_commit_low(trx);
3915

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3916
				row_mysql_unlock_data_dictionary(trx);
3917 3918 3919 3920 3921

  				trx_free_for_mysql(trx);

				DBUG_RETURN(error);
      			}
3922
      		}
3923
  	}
3924

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3925
	if (current_thd->query != NULL) {
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
3926
		LEX_STRING q;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3927

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938
		if (thd->convert_string(&q, system_charset_info,
					current_thd->query,
					current_thd->query_length,
					current_thd->charset())) {
			error = HA_ERR_OUT_OF_MEM;
		} else {
			error = row_table_add_foreign_constraints(trx,
					q.str, norm_name);

			error = convert_error_code_to_mysql(error, NULL);
		}
3939

3940 3941
		if (error) {
			innobase_commit_low(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3942

3943
			row_mysql_unlock_data_dictionary(trx);
3944

3945
  			trx_free_for_mysql(trx);
3946

3947 3948
			DBUG_RETURN(error);
		}
3949 3950
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3951 3952
  	innobase_commit_low(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3953
	row_mysql_unlock_data_dictionary(trx);
3954

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3955 3956 3957
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3958

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3959
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3960

3961
	innobase_table = dict_table_get(norm_name, NULL);
3962

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3963
	DBUG_ASSERT(innobase_table != 0);
3964

3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978
	if ((create_info->used_fields & HA_CREATE_USED_AUTO) &&
	   (create_info->auto_increment_value != 0)) {

		/* Query was ALTER TABLE...AUTO_INCREMENT = x; or 
		CREATE TABLE ...AUTO_INCREMENT = x; Find out a table
		definition from the dictionary and get the current value
		of the auto increment field. Set a new value to the
		auto increment field if the value is greater than the
		maximum value in the column. */

		auto_inc_value = create_info->auto_increment_value;
		dict_table_autoinc_initialize(innobase_table, auto_inc_value);
	}

3979
	/* Tell the InnoDB server that there might be work for
3980 3981 3982 3983 3984 3985 3986 3987 3988
	utility threads: */

	srv_active_wake_master_thread();

  	trx_free_for_mysql(trx);

	DBUG_RETURN(0);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3989 3990 3991 3992 3993 3994 3995 3996 3997 3998
/*********************************************************************
Discards or imports an InnoDB tablespace. */

int
ha_innobase::discard_or_import_tablespace(
/*======================================*/
				/* out: 0 == success, -1 == error */
	my_bool discard)	/* in: TRUE if discard, else import */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
3999
	dict_table_t*	dict_table;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4000 4001 4002 4003 4004 4005 4006 4007 4008
	trx_t*		trx;
	int		err;

 	DBUG_ENTER("ha_innobase::discard_or_import_tablespace");

	ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);

4009
	dict_table = prebuilt->table;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4010 4011 4012
	trx = prebuilt->trx;

	if (discard) {
4013
		err = row_discard_tablespace_for_mysql(dict_table->name, trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4014
	} else {
4015
		err = row_import_tablespace_for_mysql(dict_table->name, trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4016 4017
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4018
	err = convert_error_code_to_mysql(err, NULL);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4019

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4020
	DBUG_RETURN(err);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4021 4022
}

4023
/*********************************************************************
4024
Drops a table from an InnoDB database. Before calling this function,
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
4025 4026
MySQL calls innobase_commit to commit the transaction of the current user.
Then the current user cannot have locks set on the table. Drop table
4027 4028
operation inside InnoDB will remove all locks any user has on the table
inside InnoDB. */
4029 4030 4031 4032

int
ha_innobase::delete_table(
/*======================*/
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
4033 4034
				/* out: error number */
	const char*	name)	/* in: table name */
4035 4036 4037
{
	ulint	name_len;
	int	error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4038
	trx_t*	parent_trx;
4039
	trx_t*	trx;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4040
	THD     *thd= current_thd;
4041
	char	norm_name[1000];
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4042

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4043
 	DBUG_ENTER("ha_innobase::delete_table");
4044

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4045 4046 4047
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4048
	parent_trx = check_trx_exists(current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4049 4050 4051 4052 4053 4054

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4055 4056 4057 4058 4059 4060
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

4061 4062
	trx = trx_allocate_for_mysql();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4063 4064
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4065

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4066 4067 4068 4069 4070 4071 4072 4073
	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
		trx->check_unique_secondary = FALSE;
	}

4074 4075 4076
	name_len = strlen(name);

	assert(name_len < 1000);
4077

4078 4079
	/* Strangely, MySQL passes the table name without the '.frm'
	extension, in contrast to ::create */
4080

4081 4082
	normalize_table_name(norm_name, name);

4083
  	/* Drop the table in InnoDB */
4084

4085
	error = row_drop_table_for_mysql(norm_name, trx,
monty@mishka.local's avatar
monty@mishka.local committed
4086
		thd->lex->sql_command == SQLCOM_DROP_DB);
4087

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4088 4089 4090
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4091

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4092
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4093

4094
	/* Tell the InnoDB server that there might be work for
4095 4096 4097 4098
	utility threads: */

	srv_active_wake_master_thread();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4099
  	innobase_commit_low(trx);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4100

4101 4102
  	trx_free_for_mysql(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4103
	error = convert_error_code_to_mysql(error, NULL);
4104 4105 4106 4107

	DBUG_RETURN(error);
}

4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120
/*********************************************************************
Removes all tables in the named database inside InnoDB. */

int
innobase_drop_database(
/*===================*/
			/* out: error number */
	char*	path)	/* in: database path; inside InnoDB the name
			of the last directory in the path is used as
			the database name: for example, in 'mysql/data/test'
			the database name is 'test' */
{
	ulint	len		= 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4121
	trx_t*	parent_trx;
4122 4123 4124
	trx_t*	trx;
	char*	ptr;
	int	error;
4125
	char*	namebuf;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4126

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4127 4128 4129
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4130
	parent_trx = check_trx_exists(current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4131 4132 4133 4134 4135 4136

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

4137
	ptr = strend(path) - 2;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4138

4139 4140 4141 4142 4143 4144
	while (ptr >= path && *ptr != '\\' && *ptr != '/') {
		ptr--;
		len++;
	}

	ptr++;
4145
	namebuf = my_malloc(len + 2, MYF(0));
4146 4147 4148 4149

	memcpy(namebuf, ptr, len);
	namebuf[len] = '/';
	namebuf[len + 1] = '\0';
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4150
#ifdef  __WIN__
4151
	innobase_casedn_str(namebuf);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4152
#endif
4153
	trx = trx_allocate_for_mysql();
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4154 4155
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
4156

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4157 4158 4159 4160
	if (current_thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

4161
  	error = row_drop_database_for_mysql(namebuf, trx);
4162
	my_free(namebuf, MYF(0));
4163

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4164 4165 4166
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4167

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4168
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4169

4170 4171 4172 4173 4174
	/* Tell the InnoDB server that there might be work for
	utility threads: */

	srv_active_wake_master_thread();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4175
  	innobase_commit_low(trx);
4176 4177
  	trx_free_for_mysql(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4178
	error = convert_error_code_to_mysql(error, NULL);
4179 4180 4181 4182

	return(error);
}

4183
/*************************************************************************
4184
Renames an InnoDB table. */
4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195

int
ha_innobase::rename_table(
/*======================*/
				/* out: 0 or error code */
	const char*	from,	/* in: old name of the table */
	const char*	to)	/* in: new name of the table */
{
	ulint	name_len1;
	ulint	name_len2;
	int	error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4196
	trx_t*	parent_trx;
4197
	trx_t*	trx;
4198 4199
	char	norm_from[1000];
	char	norm_to[1000];
4200

4201 4202
  	DBUG_ENTER("ha_innobase::rename_table");

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4203 4204 4205
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4206
	parent_trx = check_trx_exists(current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4207 4208 4209 4210 4211 4212

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4213 4214 4215 4216 4217 4218
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

4219
	trx = trx_allocate_for_mysql();
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4220 4221
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
4222

4223 4224 4225 4226
	if (current_thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

4227 4228 4229 4230 4231
	name_len1 = strlen(from);
	name_len2 = strlen(to);

	assert(name_len1 < 1000);
	assert(name_len2 < 1000);
4232

4233 4234 4235
	normalize_table_name(norm_from, from);
	normalize_table_name(norm_to, to);

4236
  	/* Rename the table in InnoDB */
4237

4238
  	error = row_rename_table_for_mysql(norm_from, norm_to, trx);
4239

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4240 4241 4242
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4243

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4244
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4245

4246
	/* Tell the InnoDB server that there might be work for
4247 4248 4249 4250
	utility threads: */

	srv_active_wake_master_thread();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4251
  	innobase_commit_low(trx);
4252 4253
  	trx_free_for_mysql(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4254
	error = convert_error_code_to_mysql(error, NULL);
4255 4256 4257 4258 4259 4260 4261 4262 4263 4264

	DBUG_RETURN(error);
}

/*************************************************************************
Estimates the number of index records in a range. */

ha_rows
ha_innobase::records_in_range(
/*==========================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4265 4266
						/* out: estimated number of
						rows */
4267 4268 4269 4270 4271
	uint 			keynr,		/* in: index number */
        key_range		*min_key,	/* in: start key value of the
                                                   range, may also be 0 */
	key_range		*max_key)	/* in: range end key val, may
                                                   also be 0 */
4272 4273 4274 4275
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	KEY*		key;
	dict_index_t*	index;
4276
	mysql_byte*	key_val_buff2 	= (mysql_byte*) my_malloc(
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4277 4278
						  table->reclength
      						+ table->max_key_length + 100,
4279
								MYF(MY_WME));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4280 4281
	ulint		buff2_len = table->reclength
      						+ table->max_key_length + 100;
4282
	dtuple_t*	range_start;
4283
	dtuple_t*	range_end;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4284
	ib_longlong	n_rows;
4285 4286
	ulint		mode1;
	ulint		mode2;
4287 4288
	void*           heap1;
	void*           heap2;
4289

4290
   	DBUG_ENTER("records_in_range");
4291

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4292 4293
	prebuilt->trx->op_info = (char*)"estimating records in index range";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4294 4295 4296 4297
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4298

4299 4300 4301
	active_index = keynr;

	key = table->key_info + active_index;
4302

4303
	index = dict_table_get_index_noninline(prebuilt->table, key->name);
4304

4305
	range_start = dtuple_create_for_mysql(&heap1, key->key_parts);
4306
 	dict_index_copy_types(range_start, index, key->key_parts);
4307

4308
	range_end = dtuple_create_for_mysql(&heap2, key->key_parts);
4309
 	dict_index_copy_types(range_end, index, key->key_parts);
4310

4311
	row_sel_convert_mysql_key_to_innobase(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4312 4313 4314
				range_start, (byte*) key_val_buff,
				(ulint)upd_and_key_val_buff_len,
				index,
4315 4316
				(byte*) (min_key ? min_key->key :
                                         (const mysql_byte*) 0),
4317 4318
				(ulint) (min_key ? min_key->length : 0),
				prebuilt->trx);
4319

4320
	row_sel_convert_mysql_key_to_innobase(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4321 4322
				range_end, (byte*) key_val_buff2,
				buff2_len, index,
4323 4324
				(byte*) (max_key ? max_key->key :
                                         (const mysql_byte*) 0),
4325 4326
				(ulint) (max_key ? max_key->length : 0),
				prebuilt->trx);
4327 4328 4329 4330 4331

	mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
                                                HA_READ_KEY_EXACT);
	mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
                                                HA_READ_KEY_EXACT);
4332

4333
	n_rows = btr_estimate_n_rows_in_range(index, range_start,
4334
						mode1, range_end, mode2);
4335 4336
	dtuple_free_for_mysql(heap1);
	dtuple_free_for_mysql(heap2);
4337

4338 4339
    	my_free((char*) key_val_buff2, MYF(0));

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4340 4341
	prebuilt->trx->op_info = (char*)"";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4342 4343 4344 4345 4346 4347 4348 4349 4350 4351
	/* The MySQL optimizer seems to believe an estimate of 0 rows is
	always accurate and may return the result 'Empty set' based on that.
	The accuracy is not guaranteed, and even if it were, for a locking
	read we should anyway perform the search to set the next-key lock.
	Add 1 to the value to make sure MySQL does not make the assumption! */

	if (n_rows == 0) {
	        n_rows = 1;
	}

4352 4353 4354
	DBUG_RETURN((ha_rows) n_rows);
}

4355 4356
/*************************************************************************
Gives an UPPER BOUND to the number of rows in a table. This is used in
4357
filesort.cc. */
4358 4359

ha_rows
sergefp@mysql.com's avatar
sergefp@mysql.com committed
4360
ha_innobase::estimate_rows_upper_bound(void)
4361
/*======================================*/
4362
			/* out: upper bound of rows */
4363 4364
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
4365 4366
	dict_index_t*	index;
	ulonglong	estimate;
4367
	ulonglong	local_data_file_length;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4368

sergefp@mysql.com's avatar
sergefp@mysql.com committed
4369
 	DBUG_ENTER("estimate_rows_upper_bound");
4370

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4371 4372 4373 4374 4375 4376
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4377 4378 4379
	prebuilt->trx->op_info = (char*)
	                         "calculating upper bound for table rows";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4380 4381 4382 4383
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
4384

4385
	index = dict_table_get_first_index_noninline(prebuilt->table);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4386

4387
	local_data_file_length = ((ulonglong) index->stat_n_leaf_pages)
4388
    							* UNIV_PAGE_SIZE;
4389

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4390 4391
	/* Calculate a minimum length for a clustered index record and from
	that an upper bound for the number of rows. Since we only calculate
4392 4393
	new statistics in row0mysql.c when a table has grown by a threshold
	factor, we must add a safety factor 2 in front of the formula below. */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4394

4395 4396
	estimate = 2 * local_data_file_length /
					 dict_index_calc_min_rec_len(index);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4397

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4398 4399
	prebuilt->trx->op_info = (char*)"";

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4400
	DBUG_RETURN((ha_rows) estimate);
4401 4402
}

4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414
/*************************************************************************
How many seeks it will take to read through the table. This is to be
comparable to the number returned by records_in_range so that we can
decide if we should scan the table or use keys. */

double
ha_innobase::scan_time()
/*====================*/
			/* out: estimated time measured in disk seeks */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4415 4416 4417 4418 4419 4420
	/* Since MySQL seems to favor table scans too much over index
	searches, we pretend that a sequential read takes the same time
	as a random disk read, that is, we do not divide the following
	by 10, which would be physically realistic. */
	
	return((double) (prebuilt->table->stat_clustered_index_size));
4421 4422
}

4423 4424 4425
/**********************************************************************
Calculate the time it takes to read a set of ranges through an index
This enables us to optimise reads for clustered indexes. */
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
4426

4427 4428 4429 4430 4431 4432 4433
double
ha_innobase::read_time(
/*===================*/
			/* out: estimated time measured in disk seeks */
	uint    index,	/* in: key number */
	uint	ranges,	/* in: how many ranges */
	ha_rows rows)	/* in: estimated number of rows in the ranges */
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
4434
{
4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446
	ha_rows total_rows;
	double  time_for_scan;
  
	if (index != table->primary_key)
	  return handler::read_time(index, ranges, rows); // Not clustered

	if (rows <= 2)
	  return (double) rows;

	/* Assume that the read time is proportional to the scan time for all
	rows + at most one seek per range. */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4447
	time_for_scan = scan_time();
4448

sergefp@mysql.com's avatar
sergefp@mysql.com committed
4449
	if ((total_rows = estimate_rows_upper_bound()) < rows)
4450 4451 4452
	  return time_for_scan;

	return (ranges + (double) rows / (double) total_rows * time_for_scan);
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
4453 4454
}

4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466
/*************************************************************************
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object. */

void
ha_innobase::info(
/*==============*/
	uint flag)	/* in: what information MySQL requests */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	dict_table_t*	ib_table;
	dict_index_t*	index;
4467
	ha_rows		rec_per_key;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4468
	ib_longlong	n_rows;
4469 4470
	ulong		j;
	ulong		i;
4471 4472
	char		path[FN_REFLEN];
	os_file_stat_t  stat_info;
4473

4474 4475
 	DBUG_ENTER("info");

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4476 4477 4478 4479 4480 4481
        /* If we are forcing recovery at a high level, we will suppress
	statistics calculation on tables, because that may crash the
	server if an index is badly corrupted. */

        if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {

4482
                DBUG_VOID_RETURN;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4483 4484
        }

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4485 4486 4487 4488 4489 4490 4491 4492 4493
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4494 4495
	prebuilt->trx->op_info = (char*)"returning various info to MySQL";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4496
	trx_search_latch_release_if_reserved(prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4497

4498 4499 4500 4501 4502 4503
 	ib_table = prebuilt->table;

 	if (flag & HA_STATUS_TIME) {
 		/* In sql_show we call with this flag: update then statistics
 		so that they are up-to-date */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4504 4505
	        prebuilt->trx->op_info = (char*)"updating table statistics";

4506
 		dict_update_statistics(ib_table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4507 4508 4509

		prebuilt->trx->op_info = (char*)
		                          "returning various info to MySQL";
4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523

		if (ib_table->space != 0) {
			my_snprintf(path, sizeof(path), "%s/%s%s",
				    mysql_data_home, ib_table->name,
				    ".ibd");
			unpack_filename(path,path);
		} else {
			my_snprintf(path, sizeof(path), "%s/%s%s", 
				    mysql_data_home, ib_table->name,
				    reg_ext);
		
			unpack_filename(path,path);
		}

4524 4525 4526
		/* Note that we do not know the access time of the table, 
		nor the CHECK TABLE time, nor the UPDATE or INSERT time. */

4527 4528 4529
		if (os_file_get_status(path,&stat_info)) {
			create_time = stat_info.ctime;
		}
4530 4531 4532
 	}

	if (flag & HA_STATUS_VARIABLE) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556
		n_rows = ib_table->stat_n_rows;

		/* Because we do not protect stat_n_rows by any mutex in a
		delete, it is theoretically possible that the value can be
		smaller than zero! TODO: fix this race.

		The MySQL optimizer seems to assume in a left join that n_rows
		is an accurate estimate if it is zero. Of course, it is not,
		since we do not have any locks on the rows yet at this phase.
		Since SHOW TABLE STATUS seems to call this function with the
		HA_STATUS_TIME flag set, while the left join optizer does not
		set that flag, we add one to a zero value if the flag is not
		set. That way SHOW TABLE STATUS will show the best estimate,
		while the optimizer never sees the table empty. */

		if (n_rows < 0) {
			n_rows = 0;
		}

		if (n_rows == 0 && !(flag & HA_STATUS_TIME)) {
			n_rows++;
		}

    		records = (ha_rows)n_rows;
4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569
    		deleted = 0;
    		data_file_length = ((ulonglong)
				ib_table->stat_clustered_index_size)
    					* UNIV_PAGE_SIZE;
    		index_file_length = ((ulonglong)
				ib_table->stat_sum_of_other_index_sizes)
    					* UNIV_PAGE_SIZE;
    		delete_length = 0;
    		check_time = 0;

    		if (records == 0) {
    			mean_rec_length = 0;
    		} else {
4570
    			mean_rec_length = (ulong) (data_file_length / records);
4571 4572 4573 4574 4575 4576 4577 4578 4579
    		}
    	}

	if (flag & HA_STATUS_CONST) {
		index = dict_table_get_first_index_noninline(ib_table);

		if (prebuilt->clust_index_was_generated) {
			index = dict_table_get_next_index_noninline(index);
		}
4580

4581
		for (i = 0; i < table->keys; i++) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592
			if (index == NULL) {
				ut_print_timestamp(stderr);
			        fprintf(stderr,
"  InnoDB: Error: table %s contains less indexes inside InnoDB\n"
"InnoDB: than are defined in the MySQL .frm file. Have you mixed up\n"
"InnoDB: .frm files from different installations? See section\n"
"InnoDB: 15.1 at http://www.innodb.com/ibman.html\n",
				   ib_table->name);
				break;
			}

4593 4594
			for (j = 0; j < table->key_info[i].key_parts; j++) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4595 4596 4597 4598 4599 4600 4601 4602
				if (j + 1 > index->n_uniq) {
				        ut_print_timestamp(stderr);
			                fprintf(stderr,
"  InnoDB: Error: index %s of %s has %lu columns unique inside InnoDB\n"
"InnoDB: but MySQL is asking statistics for %lu columns. Have you mixed up\n"
"InnoDB: .frm files from different installations? See section\n"
"InnoDB: 15.1 at http://www.innodb.com/ibman.html\n",
						index->name,
4603 4604
						ib_table->name,
						(unsigned long) index->n_uniq,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4605 4606 4607 4608
						j + 1);
				        break;
				}

4609 4610 4611 4612
				if (index->stat_n_diff_key_vals[j + 1] == 0) {

					rec_per_key = records;
				} else {
4613
					rec_per_key = (ha_rows)(records /
4614 4615 4616
   				         index->stat_n_diff_key_vals[j + 1]);
				}

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4617 4618 4619 4620 4621 4622 4623
				/* Since MySQL seems to favor table scans
				too much over index searches, we pretend
				index selectivity is 2 times better than
				our estimate: */

				rec_per_key = rec_per_key / 2;

4624 4625 4626
				if (rec_per_key == 0) {
					rec_per_key = 1;
				}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4627

4628 4629 4630
 				table->key_info[i].rec_per_key[j]=
				  rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
				  rec_per_key;
4631
			}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4632

4633
			index = dict_table_get_next_index_noninline(index);
4634 4635
		}
	}
4636 4637

  	if (flag & HA_STATUS_ERRKEY) {
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4638 4639
		ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);

4640
		errkey = (unsigned int) row_get_mysql_key_number_for_index(
4641 4642
				       (dict_index_t*)
				       trx_get_error_info(prebuilt->trx));
4643 4644
  	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4645 4646
	prebuilt->trx->op_info = (char*)"";

4647 4648 4649
  	DBUG_VOID_RETURN;
}

4650
/**************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4651 4652
Updates index cardinalities of the table, based on 8 random dives into
each index tree. This does NOT calculate exact statistics on the table. */
4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666

int
ha_innobase::analyze(
/*=================*/			 
					/* out: returns always 0 (success) */
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
{
	/* Simply call ::info() with all the flags */
	info(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE);

	return(0);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4667
/**************************************************************************
4668 4669
This is mapped to "ALTER TABLE tablename TYPE=InnoDB", which rebuilds
the table in MySQL. */
4670

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4671 4672 4673 4674 4675
int
ha_innobase::optimize(
/*==================*/
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
4676
{
4677
        return(HA_ADMIN_TRY_ALTER);
4678 4679
}

4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695
/***********************************************************************
Tries to check that an InnoDB table is not corrupted. If corruption is
noticed, prints to stderr information about it. In case of corruption
may also assert a failure and crash the server. */

int
ha_innobase::check(
/*===============*/
					/* out: HA_ADMIN_CORRUPT or
					HA_ADMIN_OK */
	THD* 		thd,		/* in: user thread handle */
	HA_CHECK_OPT* 	check_opt)	/* in: check options, currently
					ignored */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	ulint		ret;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4696

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4697
	ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4698 4699
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4700

4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712
	if (prebuilt->mysql_template == NULL) {
		/* Build the template; we will use a dummy template
		in index scans done in checking */

		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}

	ret = row_check_table_for_mysql(prebuilt);

	if (ret == DB_SUCCESS) {
		return(HA_ADMIN_OK);
	}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4713

4714 4715 4716
  	return(HA_ADMIN_CORRUPT); 
}

4717
/*****************************************************************
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4718 4719 4720
Adds information about free space in the InnoDB tablespace to a table comment
which is printed out when a user calls SHOW TABLE STATUS. Adds also info on
foreign keys. */
4721 4722 4723 4724

char*
ha_innobase::update_table_comment(
/*==============================*/
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4725 4726 4727
				/* out: table comment + InnoDB free space +
				info on foreign keys */
        const char*	comment)/* in: table comment defined by user */
4728
{
4729 4730 4731
	uint	length			= strlen(comment);
	char*				str;
	row_prebuilt_t*	prebuilt	= (row_prebuilt_t*)innobase_prebuilt;
4732

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4733 4734 4735 4736
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

4737 4738 4739 4740
	if(length > 64000 - 3) {
		return((char*)comment); /* string too long */
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4741 4742
	update_thd(current_thd);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4743 4744
	prebuilt->trx->op_info = (char*)"returning table comment";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4745 4746 4747 4748
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
4749
	str = NULL;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4750

4751
	if (FILE* file = os_file_create_tmpfile()) {
4752
		long	flen;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4753

4754 4755
		/* output the data to a temporary file */
		fprintf(file, "InnoDB free: %lu kB",
monty@mishka.local's avatar
monty@mishka.local committed
4756 4757 4758
      		   (ulong) fsp_get_available_space_in_free_extents(
      					prebuilt->table->space));

4759 4760
		dict_print_info_on_foreign_keys(FALSE, file,
				prebuilt->trx, prebuilt->table);
4761
		flen = ftell(file);
4762 4763 4764
		if (flen < 0) {
			flen = 0;
		} else if (length + flen + 3 > 64000) {
4765 4766
			flen = 64000 - 3 - length;
		}
4767

4768 4769
		/* allocate buffer for the full string, and
		read the contents of the temporary file */
4770

4771
		str = my_malloc(length + flen + 3, MYF(0));
4772

4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785
		if (str) {
			char* pos	= str + length;
			if(length) {
				memcpy(str, comment, length);
				*pos++ = ';';
				*pos++ = ' ';
			}
			rewind(file);
			flen = fread(pos, 1, flen, file);
			pos[flen] = 0;
		}

		fclose(file);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4786
	}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4787

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4788 4789
        prebuilt->trx->op_info = (char*)"";

4790
  	return(str ? str : (char*) comment);
4791 4792
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803
/***********************************************************************
Gets the foreign key create info for a table stored in InnoDB. */

char*
ha_innobase::get_foreign_key_create_info(void)
/*==========================================*/
			/* out, own: character string in the form which
			can be inserted to the CREATE TABLE statement,
			MUST be freed with ::free_foreign_key_create_info */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
4804
	char*	str	= 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4805

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4806
	ut_a(prebuilt != NULL);
4807

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4808 4809 4810 4811 4812 4813
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

4814
	if (FILE* file = os_file_create_tmpfile()) {
4815
		long	flen;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4816

4817
		prebuilt->trx->op_info = (char*)"getting info on foreign keys";
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4818

4819 4820 4821
		/* In case MySQL calls this in the middle of a SELECT query,
		release possible adaptive hash latch to avoid
		deadlocks of threads */
4822

4823
		trx_search_latch_release_if_reserved(prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4824

4825
		/* output the data to a temporary file */
4826 4827
		dict_print_info_on_foreign_keys(TRUE, file,
				prebuilt->trx, prebuilt->table);
4828 4829 4830
		prebuilt->trx->op_info = (char*)"";

		flen = ftell(file);
4831 4832 4833
		if (flen < 0) {
			flen = 0;
		} else if(flen > 64000 - 1) {
4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850
			flen = 64000 - 1;
		}

		/* allocate buffer for the string, and
		read the contents of the temporary file */

		str = my_malloc(flen + 1, MYF(0));

		if (str) {
			rewind(file);
			flen = fread(str, 1, flen, file);
			str[flen] = 0;
		}

		fclose(file);
	} else {
		/* unable to create temporary file */
monty@mishka.local's avatar
monty@mishka.local committed
4851
          	str = my_malloc(1, MYF(MY_ZEROFILL));
4852
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4853

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4854
  	return(str);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4855 4856
}

4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882
/*********************************************************************
Checks if ALTER TABLE may change the storage engine of the table.
Changing storage engines is not allowed for tables for which there
are foreign key constraints (parent or child tables). */

bool
ha_innobase::can_switch_engines(void)
/*=================================*/
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	bool	can_switch;

 	DBUG_ENTER("ha_innobase::can_switch_engines");
	prebuilt->trx->op_info =
			"determining if there are foreign key constraints";
	row_mysql_lock_data_dictionary(prebuilt->trx);

	can_switch = !UT_LIST_GET_FIRST(prebuilt->table->referenced_list)
			&& !UT_LIST_GET_FIRST(prebuilt->table->foreign_list);

	row_mysql_unlock_data_dictionary(prebuilt->trx);
	prebuilt->trx->op_info = "";

	DBUG_RETURN(can_switch);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902
/***********************************************************************
Checks if a table is referenced by a foreign key. The MySQL manual states that
a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
delete is then allowed internally to resolve a duplicate key conflict in
REPLACE, not an update. */

uint
ha_innobase::referenced_by_foreign_key(void)
/*========================================*/
			/* out: > 0 if referenced by a FOREIGN KEY */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;

	if (dict_table_referenced_by_foreign_key(prebuilt->table)) {

		return(1);
	}

	return(0);
}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913

/***********************************************************************
Frees the foreign key create info for a table stored in InnoDB, if it is
non-NULL. */

void
ha_innobase::free_foreign_key_create_info(
/*======================================*/
	char*	str)	/* in, own: create info string to free  */
{
	if (str) {
4914
		my_free(str, MYF(0));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4915
	}
4916 4917
}

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4918 4919 4920 4921 4922 4923 4924 4925
/***********************************************************************
Tells something additional to the handler about how to do things. */

int
ha_innobase::extra(
/*===============*/
			   /* out: 0 or error number */
	enum ha_extra_function operation)
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4926 4927
                           /* in: HA_EXTRA_RETRIEVE_ALL_COLS or some
			   other flag */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4928 4929 4930 4931 4932 4933 4934 4935
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;

	/* Warning: since it is not sure that MySQL calls external_lock
	before calling this function, the trx field in prebuilt can be
	obsolete! */

	switch (operation) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947
                case HA_EXTRA_FLUSH:
                        if (prebuilt->blob_heap) {
                                row_mysql_prebuilt_free_blob_heap(prebuilt);
                        }
                        break;
                case HA_EXTRA_RESET:
                        if (prebuilt->blob_heap) {
                                row_mysql_prebuilt_free_blob_heap(prebuilt);
                        }
                        prebuilt->read_just_key = 0;
                        break;
  		case HA_EXTRA_RESET_STATE:
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4948
	        	prebuilt->read_just_key = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4949
    	        	break;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4950 4951 4952
		case HA_EXTRA_NO_KEYREAD:
    			prebuilt->read_just_key = 0;
    			break;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4953
	        case HA_EXTRA_RETRIEVE_ALL_COLS:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4954 4955 4956 4957 4958 4959 4960 4961
			prebuilt->hint_need_to_fetch_extra_cols
					= ROW_RETRIEVE_ALL_COLS;
			break;
	        case HA_EXTRA_RETRIEVE_PRIMARY_KEY:
			if (prebuilt->hint_need_to_fetch_extra_cols == 0) {
				prebuilt->hint_need_to_fetch_extra_cols
					= ROW_RETRIEVE_PRIMARY_KEY;
			}
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972
			break;
	        case HA_EXTRA_KEYREAD:
	        	prebuilt->read_just_key = 1;
	        	break;
		default:/* Do nothing */
			;
	}

	return(0);
}

4973
/**********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4974 4975 4976 4977 4978
MySQL calls this function at the start of each SQL statement inside LOCK
TABLES. Inside LOCK TABLES the ::external_lock method does not work to
mark SQL statement borders. Note also a special case: if a temporary table
is created inside LOCK TABLES, MySQL has not called external_lock() at all
on that table. */
4979 4980

int
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4981 4982
ha_innobase::start_stmt(
/*====================*/
4983 4984 4985 4986 4987 4988 4989 4990 4991 4992
	              /* out: 0 or error code */
	THD*    thd)  /* in: handle to the user thread */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

	update_thd(thd);

	trx = prebuilt->trx;

4993 4994 4995 4996 4997 4998 4999
	/* Here we release the search latch and the InnoDB thread FIFO ticket
	if they were reserved. They should have been released already at the
	end of the previous statement, but because inside LOCK TABLES the
	lock count method does not work to mark the end of a SELECT statement,
	that may not be the case. We MUST release the search latch before an
	INSERT, for example. */

5000 5001
	innobase_release_stat_resources(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5002 5003 5004 5005 5006 5007 5008 5009
	if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
	    						&& trx->read_view) {
	    	/* At low transaction isolation levels we let
		each consistent read set its own snapshot */

	    	read_view_close_for_mysql(trx);
	}

5010 5011
	auto_inc_counter_for_this_stat = 0;
	prebuilt->sql_stat_start = TRUE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5012
	prebuilt->hint_need_to_fetch_extra_cols = 0;
5013
	prebuilt->read_just_key = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5014

5015
	if (!prebuilt->mysql_has_locked) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5016 5017 5018 5019 5020 5021
	        /* This handle is for a temporary table created inside
	        this same LOCK TABLES; since MySQL does NOT call external_lock
	        in this case, we must use x-row locks inside InnoDB to be
	        prepared for an update of a row */
	  
	        prebuilt->select_lock_type = LOCK_X;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5022 5023 5024 5025
	} else {
		if (trx->isolation_level != TRX_ISO_SERIALIZABLE
		    && thd->lex->sql_command == SQLCOM_SELECT
		    && thd->lex->lock_option == TL_READ) {
5026
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056
			/* For other than temporary tables, we obtain
			no lock for consistent read (plain SELECT). */

			prebuilt->select_lock_type = LOCK_NONE;
		} else {
			/* Not a consistent read: restore the
			select_lock_type value. The value of
			stored_select_lock_type was decided in:
			1) ::store_lock(),
			2) ::external_lock(), and
			3) ::init_table_handle_for_HANDLER(). */

			prebuilt->select_lock_type =
				prebuilt->stored_select_lock_type;
		}

		if (prebuilt->stored_select_lock_type != LOCK_S
		    && prebuilt->stored_select_lock_type != LOCK_X) {
			fprintf(stderr,
"InnoDB: Error: stored_select_lock_type is %lu inside ::start_stmt()!\n",
			prebuilt->stored_select_lock_type);

			/* Set the value to LOCK_X: this is just fault
			tolerance, we do not know what the correct value
			should be! */

			prebuilt->select_lock_type = LOCK_X;
		}
	}

5057
	/* Set the MySQL flag to mark that there is an active transaction */
5058
	thd->transaction.all.innodb_active_trans = 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5059 5060

	return(0);
5061 5062
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073
/**********************************************************************
Maps a MySQL trx isolation level code to the InnoDB isolation level code */
inline
ulint
innobase_map_isolation_level(
/*=========================*/
					/* out: InnoDB isolation level */
	enum_tx_isolation	iso)	/* in: MySQL isolation level code */
{
	switch(iso) {
		case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5074
		case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5075 5076 5077 5078 5079 5080
		case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
		case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
		default: ut_a(0); return(0);
	}	
}
	
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5081 5082
/**********************************************************************
As MySQL will execute an external lock for every new table it uses when it
5083 5084 5085
starts to process an SQL statement (an exception is when MySQL calls
start_stmt for the handle) we can use this function to store the pointer to
the THD in the handle. We will also use this function to communicate
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5086 5087 5088 5089 5090 5091 5092
to InnoDB that a new SQL statement has started and that we must store a
savepoint to our transaction handle, so that we are able to roll back
the SQL statement in case of an error. */

int
ha_innobase::external_lock(
/*=======================*/
5093
			        /* out: 0 */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5094 5095 5096 5097 5098 5099 5100
	THD*	thd,		/* in: handle to the user thread */
	int 	lock_type)	/* in: lock type */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

  	DBUG_ENTER("ha_innobase::external_lock");
5101
	DBUG_PRINT("enter",("lock_type: %d", lock_type));
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5102 5103 5104 5105 5106 5107

	update_thd(thd);

	trx = prebuilt->trx;

	prebuilt->sql_stat_start = TRUE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5108
	prebuilt->hint_need_to_fetch_extra_cols = 0;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5109 5110 5111 5112 5113 5114 5115 5116

	prebuilt->read_just_key = 0;

	if (lock_type == F_WRLCK) {

		/* If this is a SELECT, then it is in UPDATE TABLE ...
		or SELECT ... FOR UPDATE */
		prebuilt->select_lock_type = LOCK_X;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5117
		prebuilt->stored_select_lock_type = LOCK_X;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5118 5119 5120
	}

	if (lock_type != F_UNLCK) {
5121
		/* MySQL is setting a new table lock */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5122

5123 5124
		/* Set the MySQL flag to mark that there is an active
		transaction */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5125
		thd->transaction.all.innodb_active_trans = 1;
5126

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5127
		trx->n_mysql_tables_in_use++;
5128
		prebuilt->mysql_has_locked = TRUE;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5129

5130 5131
		if (trx->n_mysql_tables_in_use == 1) {
		        trx->isolation_level = innobase_map_isolation_level(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5132 5133
						(enum_tx_isolation)
						thd->variables.tx_isolation);
5134
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5135 5136

		if (trx->isolation_level == TRX_ISO_SERIALIZABLE
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5137 5138
		    && prebuilt->select_lock_type == LOCK_NONE
		    && (thd->options
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5139
				& (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5140

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5141 5142
			/* To get serializable execution, we let InnoDB
			conceptually add 'LOCK IN SHARE MODE' to all SELECTs
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5143 5144 5145 5146 5147
			which otherwise would have been consistent reads. An
			exception is consistent reads in the AUTOCOMMIT=1 mode:
			we know that they are read-only transactions, and they
			can be serialized also if performed as consistent
			reads. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5148 5149 5150 5151

			prebuilt->select_lock_type = LOCK_S;
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5152 5153 5154 5155 5156 5157
		/* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
		TABLES if AUTOCOMMIT=1. It does not make much sense to acquire
		an InnoDB table lock if it is released immediately at the end
		of LOCK TABLES, and InnoDB's table locks in that case cause
		VERY easily deadlocks. */

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5158
		if (prebuilt->select_lock_type != LOCK_NONE) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5159

5160
			if (thd->in_lock_tables &&
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5161 5162
			    thd->variables.innodb_table_locks &&
			    (thd->options & OPTION_NOT_AUTOCOMMIT)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5163

5164
				ulint	error;
5165 5166
				error = row_lock_table_for_mysql(prebuilt,
							NULL, LOCK_TABLE_EXP);
5167 5168 5169 5170 5171 5172 5173

				if (error != DB_SUCCESS) {
					error = convert_error_code_to_mysql(
						error, user_thd);
					DBUG_RETURN(error);
				}
			}
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5174 5175 5176 5177

		  	trx->mysql_n_tables_locked++;
		}

5178
		DBUG_RETURN(0);
5179
	}
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5180

5181
	/* MySQL is releasing a table lock */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5182

5183 5184 5185
	trx->n_mysql_tables_in_use--;
	prebuilt->mysql_has_locked = FALSE;
	auto_inc_counter_for_this_stat = 0;
5186 5187
	if (trx->n_lock_table_exp) {
		row_unlock_tables_for_mysql(trx);
5188
	}
5189

5190 5191
	/* If the MySQL lock count drops to zero we know that the current SQL
	statement has ended */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5192

5193
	if (trx->n_mysql_tables_in_use == 0) {
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5194

5195 5196 5197
	        trx->mysql_n_tables_locked = 0;
		prebuilt->used_in_HANDLER = FALSE;
			
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5198 5199 5200 5201
		/* Release a possible FIFO ticket and search latch. Since we
		may reserve the kernel mutex, we have to release the search
		system latch first to obey the latching order. */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5202
		innobase_release_stat_resources(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5203

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5204
		if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
5205 5206 5207 5208
			if (thd->transaction.all.innodb_active_trans != 0) {
		    	        innobase_commit(thd, trx);
			}
		} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5209 5210 5211
			if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
	    						&& trx->read_view) {

5212
				/* At low transaction isolation levels we let
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5213 5214
				each consistent read set its own snapshot */

5215
				read_view_close_for_mysql(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5216
			}
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5217 5218 5219
		}
	}

5220
	DBUG_RETURN(0);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5221 5222
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5223
/****************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5224
Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5225 5226 5227 5228 5229 5230 5231
Monitor to the client. */

int
innodb_show_status(
/*===============*/
	THD*	thd)	/* in: the MySQL query thread of the caller */
{
5232 5233 5234 5235 5236 5237
	Protocol*		protocol = thd->protocol;
	trx_t*			trx;
	static const char	truncated_msg[] = "... truncated...\n";
	const long		MAX_STATUS_SIZE = 64000;
	ulint			trx_list_start = ULINT_UNDEFINED;
	ulint			trx_list_end = ULINT_UNDEFINED;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5238

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5239 5240
        DBUG_ENTER("innodb_show_status");

5241
        if (have_innodb != SHOW_OPTION_YES) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5242 5243 5244
                my_message(ER_NOT_SUPPORTED_YET,
          "Cannot call SHOW INNODB STATUS because skip-innodb is defined",
                           MYF(0));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5245 5246
                DBUG_RETURN(-1);
        }
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5247

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5248 5249 5250 5251
	trx = check_trx_exists(thd);

	innobase_release_stat_resources(trx);

5252 5253
	/* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE
	bytes of text. */
5254

5255
	long	flen, usable_len;
5256
	char*	str;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5257

5258
	mutex_enter_noninline(&srv_monitor_file_mutex);
5259
	rewind(srv_monitor_file);
5260 5261
	srv_printf_innodb_monitor(srv_monitor_file,
				&trx_list_start, &trx_list_end);
5262
	flen = ftell(srv_monitor_file);
5263
	os_file_set_eof(srv_monitor_file);
5264 5265
	if (flen < 0) {
		flen = 0;
5266 5267 5268 5269 5270 5271
	}

	if (flen > MAX_STATUS_SIZE) {
		usable_len = MAX_STATUS_SIZE;
	} else {
		usable_len = flen;
5272
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5273

5274 5275
	/* allocate buffer for the string, and
	read the contents of the temporary file */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5276

5277
	if (!(str = my_malloc(usable_len + 1, MYF(0))))
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5278
        {
monty@mishka.local's avatar
monty@mishka.local committed
5279 5280
          mutex_exit_noninline(&srv_monitor_file_mutex);
          DBUG_RETURN(-1);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5281
        }
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5282

monty@mishka.local's avatar
monty@mishka.local committed
5283
	rewind(srv_monitor_file);
5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302
	if (flen < MAX_STATUS_SIZE) {
		/* Display the entire output. */
		flen = fread(str, 1, flen, srv_monitor_file);
	} else if (trx_list_end < (ulint) flen
			&& trx_list_start < trx_list_end
			&& trx_list_start + (flen - trx_list_end)
			< MAX_STATUS_SIZE - sizeof truncated_msg - 1) {
		/* Omit the beginning of the list of active transactions. */
		long	len = fread(str, 1, trx_list_start, srv_monitor_file);
		memcpy(str + len, truncated_msg, sizeof truncated_msg - 1);
		len += sizeof truncated_msg - 1;
		usable_len = (MAX_STATUS_SIZE - 1) - len;
		fseek(srv_monitor_file, flen - usable_len, SEEK_SET);
		len += fread(str + len, 1, usable_len, srv_monitor_file);
		flen = len;
	} else {
		/* Omit the end of the output. */
		flen = fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
	}
5303

5304
	mutex_exit_noninline(&srv_monitor_file_mutex);
5305

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5306 5307
	List<Item> field_list;

5308
	field_list.push_back(new Item_empty_string("Status", flen));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5309

monty@mishka.local's avatar
monty@mishka.local committed
5310
	if (protocol->send_fields(&field_list, 1)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5311

5312
		my_free(str, MYF(0));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5313

5314
		DBUG_RETURN(-1);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5315 5316
	}

monty@mishka.local's avatar
monty@mishka.local committed
5317 5318 5319
        protocol->prepare_for_resend();
        protocol->store(str, flen, system_charset_info);
        my_free(str, MYF(0));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5320 5321 5322 5323

        if (protocol->write())
          DBUG_RETURN(-1);

monty@mishka.local's avatar
monty@mishka.local committed
5324
	send_eof(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5325 5326 5327
  	DBUG_RETURN(0);
}

5328 5329 5330 5331 5332
/****************************************************************************
 Handling the shared INNOBASE_SHARE structure that is needed to provide table
 locking.
****************************************************************************/

5333
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
5334 5335 5336
			      my_bool not_used __attribute__((unused)))
{
  *length=share->table_name_length;
5337
  return (mysql_byte*) share->table_name;
5338 5339 5340 5341 5342 5343 5344
}

static INNOBASE_SHARE *get_share(const char *table_name)
{
  INNOBASE_SHARE *share;
  pthread_mutex_lock(&innobase_mutex);
  uint length=(uint) strlen(table_name);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
5345
  if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables,
5346
					(mysql_byte*) table_name,
5347 5348 5349 5350 5351 5352 5353 5354
					    length)))
  {
    if ((share=(INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1,
				       MYF(MY_WME | MY_ZEROFILL))))
    {
      share->table_name_length=length;
      share->table_name=(char*) (share+1);
      strmov(share->table_name,table_name);
hf@deer.(none)'s avatar
SCRUM  
hf@deer.(none) committed
5355
      if (my_hash_insert(&innobase_open_tables, (mysql_byte*) share))
5356 5357 5358 5359 5360 5361
      {
	pthread_mutex_unlock(&innobase_mutex);
	my_free((gptr) share,0);
	return 0;
      }
      thr_lock_init(&share->lock);
5362
      pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST);
5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374
    }
  }
  share->use_count++;
  pthread_mutex_unlock(&innobase_mutex);
  return share;
}

static void free_share(INNOBASE_SHARE *share)
{
  pthread_mutex_lock(&innobase_mutex);
  if (!--share->use_count)
  {
5375
    hash_delete(&innobase_open_tables, (mysql_byte*) share);
5376 5377 5378 5379 5380 5381
    thr_lock_delete(&share->lock);
    pthread_mutex_destroy(&share->mutex);
    my_free((gptr) share, MYF(0));
  }
  pthread_mutex_unlock(&innobase_mutex);
}
5382 5383

/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5384
Converts a MySQL table lock stored in the 'lock' field of the handle to
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5385 5386 5387 5388 5389 5390
a proper type before storing pointer to the lock into an array of pointers.
MySQL also calls this if it wants to reset some table locks to a not-locked
state during the processing of an SQL query. An example is that during a
SELECT the read lock is released early on the 'const' tables where we only
fetch one row. MySQL does not call this when it releases all locks at the
end of an SQL statement. */
5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407

THR_LOCK_DATA**
ha_innobase::store_lock(
/*====================*/
						/* out: pointer to the next
						element in the 'to' array */
	THD*			thd,		/* in: user thread handle */
	THR_LOCK_DATA**		to,		/* in: pointer to an array
						of pointers to lock structs;
						pointer to the 'lock' field
						of current handle is stored
						next to this array */
	enum thr_lock_type 	lock_type)	/* in: lock type to store in
						'lock' */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5408 5409 5410
	if ((lock_type == TL_READ && thd->in_lock_tables) ||
	    (lock_type == TL_READ_HIGH_PRIORITY && thd->in_lock_tables) ||
	    lock_type == TL_READ_WITH_SHARED_LOCKS ||
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5411
	    lock_type == TL_READ_NO_INSERT ||
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5412 5413
	    (thd->lex->sql_command != SQLCOM_SELECT
	     && lock_type != TL_IGNORE)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5414

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5415 5416 5417 5418 5419
		/* The OR cases above are in this order:
		1) MySQL is doing LOCK TABLES ... READ LOCAL, or
		2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
		3) this is a SELECT ... IN SHARE MODE, or
		4) we are doing a complex SQL statement like
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5420
		INSERT INTO ... SELECT ... and the logical logging (MySQL
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5421
		binlog) requires the use of a locking read, or
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5422 5423 5424 5425
		MySQL is doing LOCK TABLES ... READ.
		5) we let InnoDB do locking reads for all SQL statements that
		are not simple SELECTs; note that select_lock_type in this
		case may get strengthened in ::external_lock() to LOCK_X. */
5426

5427 5428 5429 5430 5431
		if (srv_locks_unsafe_for_binlog &&
		    prebuilt->trx->isolation_level != TRX_ISO_SERIALIZABLE &&
		    (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT) &&
		    thd->lex->sql_command != SQLCOM_SELECT &&
		    thd->lex->sql_command != SQLCOM_UPDATE_MULTI &&
5432 5433
		    thd->lex->sql_command != SQLCOM_DELETE_MULTI &&
		    thd->lex->sql_command != SQLCOM_LOCK_TABLES) {
5434 5435 5436 5437

			/* In case we have innobase_locks_unsafe_for_binlog
			option set and isolation level of the transaction
			is not set to serializable and MySQL is doing
5438 5439 5440
			INSERT INTO...SELECT or UPDATE ... = (SELECT ...)
			without FOR UPDATE or IN SHARE MODE in select, then
			we use consistent read for select. */
5441 5442 5443

			prebuilt->select_lock_type = LOCK_NONE;
			prebuilt->stored_select_lock_type = LOCK_NONE;
5444 5445 5446 5447 5448 5449 5450
		} else if (thd->lex->sql_command == SQLCOM_CHECKSUM) {
			/* Use consistent read for checksum table and
			convert lock type to the TL_READ */

			prebuilt->select_lock_type = LOCK_NONE;
			prebuilt->stored_select_lock_type = LOCK_NONE;
			lock.type = TL_READ;
5451 5452 5453 5454
		} else {
			prebuilt->select_lock_type = LOCK_S;
			prebuilt->stored_select_lock_type = LOCK_S;
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5455

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5456 5457 5458 5459 5460 5461 5462
	} else if (lock_type != TL_IGNORE) {

	        /* In ha_berkeley.cc there is a comment that MySQL
	        may in exceptional cases call this with TL_IGNORE also
	        when it is NOT going to release the lock. */

	        /* We set possible LOCK_X value in external_lock, not yet
5463
		here even if this would be SELECT ... FOR UPDATE */
5464

5465
		prebuilt->select_lock_type = LOCK_NONE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5466
		prebuilt->stored_select_lock_type = LOCK_NONE;
5467 5468 5469 5470
	}

	if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5471 5472 5473 5474 5475 5476 5477 5478
		if (lock_type == TL_READ && thd->in_lock_tables) {
			/* We come here if MySQL is processing LOCK TABLES
			... READ LOCAL. MyISAM under that table lock type
			reads the table as it was at the time the lock was
			granted (new inserts are allowed, but not seen by the
			reader). To get a similar effect on an InnoDB table,
			we must use LOCK TABLES ... READ. We convert the lock
			type here, so that for InnoDB, READ LOCAL is
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5479 5480 5481
			equivalent to READ. This will change the InnoDB
			behavior in mysqldump, so that dumps of InnoDB tables
			are consistent with dumps of MyISAM tables. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5482 5483 5484 5485

			lock_type = TL_READ_NO_INSERT;
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5486 5487
    		/* If we are not doing a LOCK TABLE or DISCARD/IMPORT
		TABLESPACE, then allow multiple writers */
5488 5489

    		if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5490
	 	    lock_type <= TL_WRITE) && !thd->in_lock_tables
5491 5492
		    && !thd->tablespace_op
                    && thd->lex->sql_command != SQLCOM_CREATE_TABLE) {
5493 5494 5495 5496

      			lock_type = TL_WRITE_ALLOW_WRITE;
      		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5497 5498 5499 5500 5501 5502 5503 5504 5505 5506
		/* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
		MySQL would use the lock TL_READ_NO_INSERT on t2, and that
		would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
		to t2. Convert the lock to a normal read lock to allow
		concurrent inserts to t2. */
      		
		if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables) {
			lock_type = TL_READ;
		}
		
5507 5508 5509 5510
 		lock.type=lock_type;
  	}

  	*to++= &lock;
5511

5512 5513 5514
	return(to);
}

5515
/***********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5516 5517
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5518
counter if it already has been initialized. In paramete ret returns
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5519
the value of the auto-inc counter. */
5520

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5521 5522 5523 5524 5525 5526
int
ha_innobase::innobase_read_and_init_auto_inc(
/*=========================================*/
				/* out: 0 or error code: deadlock or
				lock wait timeout */
	longlong*	ret)	/* out: auto-inc value */
5527
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5528
  	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5529
    	longlong        auto_inc;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5530
  	int     	error;
5531

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5532
  	ut_a(prebuilt);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5533 5534
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5535 5536
	ut_a(prebuilt->table);
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5537 5538 5539 5540 5541
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5542
	auto_inc = dict_table_autoinc_read(prebuilt->table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5543

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5544 5545 5546 5547 5548 5549
	if (auto_inc != 0) {
		/* Already initialized */
		*ret = auto_inc;
	
		return(0);
	}
5550

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5551
	error = row_lock_table_autoinc_for_mysql(prebuilt);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5552

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5553 5554
	if (error != DB_SUCCESS) {
		error = convert_error_code_to_mysql(error, user_thd);
5555

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5556 5557
		goto func_exit;
	}	
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5558

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5559 5560
	/* Check again if someone has initialized the counter meanwhile */
	auto_inc = dict_table_autoinc_read(prebuilt->table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5561

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5562 5563 5564 5565
	if (auto_inc != 0) {
		*ret = auto_inc;
	
		return(0);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5566
	}
5567

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5568 5569 5570 5571 5572 5573 5574 5575 5576
  	(void) extra(HA_EXTRA_KEYREAD);
  	index_init(table->next_number_index);

	/* We use an exclusive lock when we read the max key value from the
  	auto-increment column index. This is because then build_template will
  	advise InnoDB to fetch all columns. In SHOW TABLE STATUS the query
  	id of the auto-increment column is not changed, and previously InnoDB
  	did not fetch it, causing SHOW TABLE STATUS to show wrong values
  	for the autoinc column. */
5577

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5578
  	prebuilt->select_lock_type = LOCK_X;
5579

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5580 5581 5582
  	/* Play safe and also give in another way the hint to fetch
  	all columns in the key: */
  	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5583
	prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
5584

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5585
	prebuilt->trx->mysql_n_tables_locked += 1;
5586
  
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5587
	error = index_last(table->record[1]);
5588

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5589
  	if (error) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600
		if (error == HA_ERR_END_OF_FILE) {
			/* The table was empty, initialize to 1 */
			auto_inc = 1;

			error = 0;
		} else {
			/* Deadlock or a lock wait timeout */
  			auto_inc = -1;

  			goto func_exit;
  		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5601
  	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5602 5603
		/* Initialize to max(col) + 1 */
    		auto_inc = (longlong) table->next_number_field->
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5604 5605
                        	val_int_offset(table->rec_buff_length) + 1;
  	}
5606

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5607 5608 5609
	dict_table_autoinc_initialize(prebuilt->table, auto_inc);

func_exit:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5610
  	(void) extra(HA_EXTRA_NO_KEYREAD);
5611

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639
	index_end();

	*ret = auto_inc;

  	return(error);
}

/***********************************************************************
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
counter if it already has been initialized. Returns the value of the
auto-inc counter. */

longlong
ha_innobase::get_auto_increment()
/*=============================*/
                         /* out: auto-increment column value, -1 if error
                         (deadlock or lock wait timeout) */
{
  	longlong        nr;
  	int     	error;
	
	error = innobase_read_and_init_auto_inc(&nr);

	if (error) {

		return(-1);
	}
5640

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5641
	return(nr);
5642 5643
}

5644
/***********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5645
This function stores the binlog offset and flushes logs. */
5646 5647 5648

void 
innobase_store_binlog_offset_and_flush_log(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5649 5650 5651 5652
/*=======================================*/
    char 	*binlog_name,	/* in: binlog name */
    longlong 	offset)		/* in: binlog offset */
{
5653 5654 5655 5656 5657 5658 5659 5660
	mtr_t mtr;
	
	assert(binlog_name != NULL);

	/* Start a mini-transaction */
        mtr_start_noninline(&mtr); 

	/* Update the latest MySQL binlog name and offset info
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5661
        in trx sys header */
5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673

        trx_sys_update_mysql_binlog_offset(
            binlog_name,
            offset,
            TRX_SYS_MYSQL_LOG_INFO, &mtr);

        /* Commits the mini-transaction */
        mtr_commit(&mtr);
        
	/* Syncronous flush of the log buffer to disk */
	log_buffer_flush_to_disk();
}
guilhem@mysql.com's avatar
guilhem@mysql.com committed
5674

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5675 5676
char*
ha_innobase::get_mysql_bin_log_name()
guilhem@mysql.com's avatar
guilhem@mysql.com committed
5677
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5678
	return(trx_sys_mysql_bin_log_name);
guilhem@mysql.com's avatar
guilhem@mysql.com committed
5679 5680
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5681 5682
ulonglong
ha_innobase::get_mysql_bin_log_pos()
guilhem@mysql.com's avatar
guilhem@mysql.com committed
5683
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5684 5685 5686 5687
  	/* trx... is ib_longlong, which is a typedef for a 64-bit integer
	(__int64 or longlong) so it's ok to cast it to ulonglong. */

  	return(trx_sys_mysql_bin_log_pos);
guilhem@mysql.com's avatar
guilhem@mysql.com committed
5688 5689
}

5690
extern "C" {
5691
/**********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5692 5693 5694 5695 5696 5697 5698
This function is used to find the storage length in bytes of the first n
characters for prefix indexes using a multibyte character set. The function
finds charset information and returns length of prefix_len characters in the
index field in bytes.

NOTE: the prototype of this function is copied to data0type.c! If you change
this function, you MUST change also data0type.c! */
5699

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5700 5701 5702 5703 5704
ulint
innobase_get_at_most_n_mbchars(
/*===========================*/
				/* out: number of bytes occupied by the first
				n characters */
5705
	ulint charset_id,	/* in: character set id */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5706 5707 5708 5709 5710
	ulint prefix_len,	/* in: prefix length in bytes of the index
				(this has to be divided by mbmaxlen to get the
				number of CHARACTERS n in the prefix) */
	ulint data_len,         /* in: length of the string in bytes */
	const char* str)	/* in: character string */
5711
{
5712
	ulint char_length;	/* character length in bytes */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5713
	ulint n_chars;		/* number of characters in prefix */
5714
	CHARSET_INFO* charset;	/* charset used in the field */
5715

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5716
	charset = get_charset(charset_id, MYF(MY_WME));
5717

5718 5719
	ut_ad(charset);
	ut_ad(charset->mbmaxlen);
5720

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5721
	/* Calculate how many characters at most the prefix index contains */
5722

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5723
	n_chars = prefix_len / charset->mbmaxlen;
5724

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5725 5726 5727
	/* If the charset is multi-byte, then we must find the length of the
	first at most n chars in the string. If the string contains less
	characters than n, then we return the length to the end of the last
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5728
	character. */
5729

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5730 5731
	if (charset->mbmaxlen > 1) {
		/* my_charpos() returns the byte length of the first n_chars
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747
		characters, or a value bigger than the length of str, if
		there were not enough full characters in str.

		Why does the code below work:
		Suppose that we are looking for n UTF-8 characters.

		1) If the string is long enough, then the prefix contains at
		least n complete UTF-8 characters + maybe some extra
		characters + an incomplete UTF-8 character. No problem in
		this case. The function returns the pointer to the
		end of the nth character.

		2) If the string is not long enough, then the string contains
		the complete value of a column, that is, only complete UTF-8
		characters, and we can store in the column prefix index the
		whole string. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5748

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5749 5750 5751 5752 5753
		char_length = my_charpos(charset, str,
						str + data_len, n_chars);
		if (char_length > data_len) {
			char_length = data_len;
		}		
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5754
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5755 5756 5757 5758 5759
		if (data_len < prefix_len) {
			char_length = data_len;
		} else {
			char_length = prefix_len;
		}
5760
	}
5761

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5762
	return(char_length);
5763 5764 5765
}
}

5766 5767
extern "C" {
/**********************************************************************
5768 5769 5770
This function returns true if 

1) SQL-query in the current thread
5771
is either REPLACE or LOAD DATA INFILE REPLACE. 
5772 5773 5774 5775

2) SQL-query in the current thread
is INSERT ON DUPLICATE KEY UPDATE.

5776 5777 5778 5779
NOTE that /mysql/innobase/row/row0ins.c must contain the 
prototype for this function ! */

ibool
5780
innobase_query_is_update(void)
5781
/*==========================*/
5782 5783 5784 5785 5786 5787
{
	THD*	thd;
	
	thd = (THD *)innobase_current_thd();
	
	if ( thd->lex->sql_command == SQLCOM_REPLACE ||
5788
	     thd->lex->sql_command == SQLCOM_REPLACE_SELECT ||
5789 5790
	     ( thd->lex->sql_command == SQLCOM_LOAD &&
	       thd->lex->duplicates == DUP_REPLACE )) {
5791
		return(1);
5792
	}
5793 5794 5795

	if ( thd->lex->sql_command == SQLCOM_INSERT &&
	     thd->lex->duplicates  == DUP_UPDATE ) {
5796
		return(1);
5797 5798
	}

5799
	return(0);
5800 5801 5802
}
}

5803
#endif /* HAVE_INNOBASE_DB */