ha_innodb.cc 190 KB
Newer Older
unknown's avatar
unknown committed
1
/* Copyright (C) 2000-2005 MySQL AB & Innobase Oy
2

3 4 5 6
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
7

8 9 10 11
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
12

13 14 15 16
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

17
/* This file defines the InnoDB handler: the interface between MySQL and InnoDB
18 19
NOTE: You can only use noninlined InnoDB functions in this file, because we
have disables the InnoDB inlining in this file. */
20

21 22
/* TODO list for the InnoDB handler in 5.0:
  - Remove the flag trx->active_trans and look at the InnoDB
23
    trx struct state field
unknown's avatar
unknown committed
24
  - fix savepoint functions to use savepoint storage area
unknown's avatar
unknown committed
25 26 27
  - Find out what kind of problems the OS X case-insensitivity causes to
    table and database names; should we 'normalize' the names like we do
    in Windows?
28
*/
unknown's avatar
unknown committed
29

30 31 32 33 34
#ifdef __GNUC__
#pragma implementation				// gcc: Class implementation
#endif

#include "mysql_priv.h"
unknown's avatar
unknown committed
35
#include "slave.h"
unknown's avatar
unknown committed
36

37 38 39 40
#ifdef HAVE_INNOBASE_DB
#include <m_ctype.h>
#include <hash.h>
#include <myisampack.h>
unknown's avatar
unknown committed
41
#include <mysys_err.h>
42
#include <my_sys.h>
43

44 45
#define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1))

46
#include "ha_innodb.h"
unknown's avatar
unknown committed
47

unknown's avatar
unknown committed
48 49
pthread_mutex_t innobase_share_mutex, // to protect innobase_open_files
                prepare_commit_mutex; // to force correct commit order in binlog
50
bool innodb_inited= 0;
unknown's avatar
unknown committed
51

52
/* Store MySQL definition of 'byte': in Linux it is char while InnoDB
53 54 55
uses unsigned char; the header univ.i which we include next defines
'byte' as a macro which expands to 'unsigned char' */

56
typedef byte	mysql_byte;
unknown's avatar
unknown committed
57

unknown's avatar
unknown committed
58 59
#define INSIDE_HA_INNOBASE_CC

60
/* Include necessary InnoDB headers */
61
extern "C" {
unknown's avatar
unknown committed
62
#include "../innobase/include/univ.i"
unknown's avatar
unknown committed
63
#include "../innobase/include/os0file.h"
unknown's avatar
unknown committed
64
#include "../innobase/include/os0thread.h"
unknown's avatar
unknown committed
65 66 67 68
#include "../innobase/include/srv0start.h"
#include "../innobase/include/srv0srv.h"
#include "../innobase/include/trx0roll.h"
#include "../innobase/include/trx0trx.h"
unknown's avatar
unknown committed
69
#include "../innobase/include/trx0sys.h"
70
#include "../innobase/include/mtr0mtr.h"
unknown's avatar
unknown committed
71 72 73 74 75
#include "../innobase/include/row0ins.h"
#include "../innobase/include/row0mysql.h"
#include "../innobase/include/row0sel.h"
#include "../innobase/include/row0upd.h"
#include "../innobase/include/log0log.h"
76
#include "../innobase/include/lock0lock.h"
unknown's avatar
unknown committed
77 78 79
#include "../innobase/include/dict0crea.h"
#include "../innobase/include/btr0cur.h"
#include "../innobase/include/btr0btr.h"
80
#include "../innobase/include/fsp0fsp.h"
81
#include "../innobase/include/sync0sync.h"
unknown's avatar
unknown committed
82
#include "../innobase/include/fil0fil.h"
83
#include "../innobase/include/trx0xa.h"
84 85 86 87 88
}

#define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */
#define HA_INNOBASE_RANGE_COUNT	  100

89 90
uint 	innobase_init_flags 	= 0;
ulong 	innobase_cache_size 	= 0;
91
ulong 	innobase_large_page_size = 0;
92

unknown's avatar
unknown committed
93 94 95
/* The default values for the following, type long, start-up parameters
are declared in mysqld.cc: */

96
long innobase_mirrored_log_groups, innobase_log_files_in_group,
97
     innobase_log_file_size, innobase_log_buffer_size,
unknown's avatar
unknown committed
98 99
     innobase_buffer_pool_awe_mem_mb,
     innobase_buffer_pool_size, innobase_additional_mem_pool_size,
unknown's avatar
Merge  
unknown committed
100
     innobase_file_io_threads, innobase_lock_wait_timeout,
unknown's avatar
unknown committed
101 102
     innobase_thread_concurrency, innobase_force_recovery,
     innobase_open_files;
unknown's avatar
unknown committed
103

unknown's avatar
unknown committed
104 105
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
unknown's avatar
unknown committed
106
  
unknown's avatar
unknown committed
107
char*	innobase_data_home_dir			= NULL;
unknown's avatar
unknown committed
108
char*	innobase_data_file_path 		= NULL;
unknown's avatar
unknown committed
109
char*	innobase_log_group_home_dir		= NULL;
unknown's avatar
unknown committed
110
char*	innobase_log_arch_dir			= NULL;/* unused */
unknown's avatar
unknown committed
111 112
/* The following has a misleading name: starting from 4.0.5, this also
affects Windows: */
unknown's avatar
unknown committed
113 114 115 116 117
char*	innobase_unix_file_flush_method		= NULL;

/* Below we have boolean-valued start-up parameters, and their default
values */

unknown's avatar
unknown committed
118
uint	innobase_flush_log_at_trx_commit	= 1;
unknown's avatar
unknown committed
119
my_bool innobase_log_archive			= FALSE;/* unused */
120 121 122
my_bool innobase_use_doublewrite    = TRUE;
my_bool innobase_use_checksums      = TRUE;
my_bool innobase_use_large_pages    = FALSE;
unknown's avatar
unknown committed
123 124
my_bool	innobase_use_native_aio			= FALSE;
my_bool	innobase_fast_shutdown			= TRUE;
125 126 127
my_bool innobase_very_fast_shutdown		= FALSE; /* this can be set to
							 1 just prior calling
							 innobase_end() */
unknown's avatar
unknown committed
128
my_bool	innobase_file_per_table			= FALSE;
129
my_bool innobase_locks_unsafe_for_binlog        = FALSE;
130
my_bool innobase_create_status_file		= FALSE;
131

unknown's avatar
unknown committed
132
static char *internal_innobase_data_file_path	= NULL;
133

134
/* The following counter is used to convey information to InnoDB
135 136 137 138 139
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
it every INNOBASE_WAKE_INTERVAL'th step. */

#define INNOBASE_WAKE_INTERVAL	32
140
ulong	innobase_active_counter	= 0;
141 142 143

char*	innobase_home 	= NULL;

unknown's avatar
unknown committed
144
static HASH 	innobase_open_tables;
145

146 147 148 149
#ifdef __NETWARE__  	/* some special cleanup for NetWare */
bool nw_panic = FALSE;
#endif

150
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
151 152 153
			      my_bool not_used __attribute__((unused)));
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
154 155 156 157 158
static int innobase_close_connection(THD* thd);
static int innobase_commit(THD* thd, bool all);
static int innobase_rollback(THD* thd, bool all);
static int innobase_rollback_to_savepoint(THD* thd, void *savepoint);
static int innobase_savepoint(THD* thd, void *savepoint);
unknown's avatar
unknown committed
159
static int innobase_release_savepoint(THD* thd, void *savepoint);
160 161

static handlerton innobase_hton = {
unknown's avatar
unknown committed
162
  "InnoDB",
163 164
  0,				/* slot */
  sizeof(trx_named_savept_t),	/* savepoint size. TODO: use it */
165 166 167
  innobase_close_connection,
  innobase_savepoint,
  innobase_rollback_to_savepoint,
unknown's avatar
unknown committed
168
  innobase_release_savepoint,
169 170 171 172 173
  innobase_commit,		/* commit */
  innobase_rollback,		/* rollback */
  innobase_xa_prepare,		/* prepare */
  innobase_xa_recover,		/* recover */
  innobase_commit_by_xid,	/* commit_by_xid */
unknown's avatar
unknown committed
174
  innobase_rollback_by_xid	/* rollback_by_xid */
175
};
176

177 178 179 180 181 182 183 184
/*********************************************************************
Commits a transaction in an InnoDB database. */

void
innobase_commit_low(
/*================*/
	trx_t*	trx);	/* in: transaction handle */

185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
struct show_var_st innodb_status_variables[]= {
  {"buffer_pool_pages_data",
  (char*) &export_vars.innodb_buffer_pool_pages_data,     SHOW_LONG},
  {"buffer_pool_pages_dirty",
  (char*) &export_vars.innodb_buffer_pool_pages_dirty,    SHOW_LONG},
  {"buffer_pool_pages_flushed",
  (char*) &export_vars.innodb_buffer_pool_pages_flushed,  SHOW_LONG},
  {"buffer_pool_pages_free",
  (char*) &export_vars.innodb_buffer_pool_pages_free,     SHOW_LONG},
  {"buffer_pool_pages_latched",
  (char*) &export_vars.innodb_buffer_pool_pages_latched,  SHOW_LONG},
  {"buffer_pool_pages_misc",
  (char*) &export_vars.innodb_buffer_pool_pages_misc,     SHOW_LONG},
  {"buffer_pool_pages_total",
  (char*) &export_vars.innodb_buffer_pool_pages_total,    SHOW_LONG},
  {"buffer_pool_read_ahead_rnd",
  (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
  {"buffer_pool_read_ahead_seq",
  (char*) &export_vars.innodb_buffer_pool_read_ahead_seq, SHOW_LONG},
  {"buffer_pool_read_requests",
  (char*) &export_vars.innodb_buffer_pool_read_requests,  SHOW_LONG},
  {"buffer_pool_reads",
  (char*) &export_vars.innodb_buffer_pool_reads,          SHOW_LONG},
  {"buffer_pool_wait_free",
  (char*) &export_vars.innodb_buffer_pool_wait_free,      SHOW_LONG},
  {"buffer_pool_write_requests",
  (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
  {"data_fsyncs",
  (char*) &export_vars.innodb_data_fsyncs,                SHOW_LONG},
  {"data_pending_fsyncs",
  (char*) &export_vars.innodb_data_pending_fsyncs,        SHOW_LONG},
  {"data_pending_reads",
  (char*) &export_vars.innodb_data_pending_reads,         SHOW_LONG},
  {"data_pending_writes",
  (char*) &export_vars.innodb_data_pending_writes,        SHOW_LONG},
  {"data_read",
  (char*) &export_vars.innodb_data_read,                  SHOW_LONG},
  {"data_reads",
  (char*) &export_vars.innodb_data_reads,                 SHOW_LONG},
  {"data_writes",
  (char*) &export_vars.innodb_data_writes,                SHOW_LONG},
  {"data_written",
  (char*) &export_vars.innodb_data_written,               SHOW_LONG},
  {"dblwr_pages_written",
  (char*) &export_vars.innodb_dblwr_pages_written,        SHOW_LONG},
  {"dblwr_writes",
  (char*) &export_vars.innodb_dblwr_writes,               SHOW_LONG},
  {"log_waits",
  (char*) &export_vars.innodb_log_waits,                  SHOW_LONG},
  {"log_write_requests",
  (char*) &export_vars.innodb_log_write_requests,         SHOW_LONG},
  {"log_writes",
  (char*) &export_vars.innodb_log_writes,                 SHOW_LONG},
  {"os_log_fsyncs",
  (char*) &export_vars.innodb_os_log_fsyncs,              SHOW_LONG},
  {"os_log_pending_fsyncs",
  (char*) &export_vars.innodb_os_log_pending_fsyncs,      SHOW_LONG},
  {"os_log_pending_writes",
  (char*) &export_vars.innodb_os_log_pending_writes,      SHOW_LONG},
  {"os_log_written",
  (char*) &export_vars.innodb_os_log_written,             SHOW_LONG},
  {"page_size",
  (char*) &export_vars.innodb_page_size,                  SHOW_LONG},
  {"pages_created",
  (char*) &export_vars.innodb_pages_created,              SHOW_LONG},
  {"pages_read",
  (char*) &export_vars.innodb_pages_read,                 SHOW_LONG},
  {"pages_written",
  (char*) &export_vars.innodb_pages_written,              SHOW_LONG},
unknown's avatar
unknown committed
254 255 256 257 258 259 260 261 262 263
  {"row_lock_waits",
  (char*) &export_vars.innodb_row_lock_waits,             SHOW_LONG},
  {"row_lock_current_waits",
  (char*) &export_vars.innodb_row_lock_current_waits,     SHOW_LONG},
  {"row_lock_time",
  (char*) &export_vars.innodb_row_lock_time,              SHOW_LONGLONG},
  {"row_lock_time_max",
  (char*) &export_vars.innodb_row_lock_time_max,          SHOW_LONG},
  {"row_lock_time_avg",
  (char*) &export_vars.innodb_row_lock_time_avg,          SHOW_LONG},
264 265 266 267 268 269 270 271 272 273
  {"rows_deleted",
  (char*) &export_vars.innodb_rows_deleted,               SHOW_LONG},
  {"rows_inserted",
  (char*) &export_vars.innodb_rows_inserted,              SHOW_LONG},
  {"rows_read",
  (char*) &export_vars.innodb_rows_read,                  SHOW_LONG},
  {"rows_updated",
  (char*) &export_vars.innodb_rows_updated,               SHOW_LONG},
  {NullS, NullS, SHOW_LONG}};

274 275
/* General functions */

unknown's avatar
unknown committed
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_enter_innodb(
/*=========================*/
	trx_t*	trx)	/* in: transaction handle */
{
	if (srv_thread_concurrency >= 500) {

		return;
	}

	srv_conc_enter_innodb(trx);
}

/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_exit_innodb(
/*========================*/
	trx_t*	trx)	/* in: transaction handle */
{
	if (srv_thread_concurrency >= 500) {

		return;
	}

	srv_conc_exit_innodb(trx);
}

unknown's avatar
unknown committed
310
/**********************************************************************
unknown's avatar
unknown committed
311
Releases possible search latch and InnoDB thread FIFO ticket. These should
unknown's avatar
unknown committed
312 313 314 315
be released at each SQL statement end, and also when mysqld passes the
control to the client. It does no harm to release these also in the middle
of an SQL statement. */
inline
unknown's avatar
unknown committed
316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
void
innobase_release_stat_resources(
/*============================*/
	trx_t*	trx)	/* in: transaction object */
{
	if (trx->has_search_latch) {
		trx_search_latch_release_if_reserved(trx);
	}

	if (trx->declared_to_be_inside_innodb) {
		/* Release our possible ticket in the FIFO */

		srv_conc_force_exit_innodb(trx);
	}
}

332 333 334 335 336 337 338 339
/************************************************************************
Call this function when mysqld passes control to the client. That is to
avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
documentation, see handler.cc. */

void
innobase_release_temporary_latches(
/*===============================*/
340
        THD *thd)
341
{
unknown's avatar
unknown committed
342 343
	trx_t*	trx;

unknown's avatar
unknown committed
344 345 346 347 348
	if (!innodb_inited) {
		
		return;
	}

unknown's avatar
unknown committed
349 350 351 352 353
	trx = (trx_t*) thd->ha_data[innobase_hton.slot];

	if (trx) {
        	innobase_release_stat_resources(trx);
	}
354 355
}

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
/************************************************************************
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
when a single database operation may introduce a small need for
server utility activity, like checkpointing. */
inline
void
innobase_active_small(void)
/*=======================*/
{
	innobase_active_counter++;

	if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
		srv_active_wake_master_thread();
	}
}

373
/************************************************************************
unknown's avatar
unknown committed
374 375 376
Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
timeout or a deadlock. */
377 378 379 380 381
static
int
convert_error_code_to_mysql(
/*========================*/
			/* out: MySQL error code */
unknown's avatar
unknown committed
382 383
	int	error,	/* in: InnoDB error code */
	THD*	thd)	/* in: user thread handle or NULL */
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
{
	if (error == DB_SUCCESS) {

		return(0);

  	} else if (error == (int) DB_DUPLICATE_KEY) {

    		return(HA_ERR_FOUND_DUPP_KEY);

 	} else if (error == (int) DB_RECORD_NOT_FOUND) {

    		return(HA_ERR_NO_ACTIVE_RECORD);

 	} else if (error == (int) DB_ERROR) {

unknown's avatar
unknown committed
399
    		return(-1); /* unspecified error */
400 401

 	} else if (error == (int) DB_DEADLOCK) {
unknown's avatar
unknown committed
402
 		/* Since we rolled back the whole transaction, we must
unknown's avatar
unknown committed
403 404 405 406 407 408
 		tell it also to MySQL so that MySQL knows to empty the
 		cached binlog for this transaction */

 		if (thd) {
 			ha_rollback(thd);
 		}
409

410 411 412 413
    		return(HA_ERR_LOCK_DEADLOCK);

 	} else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {

unknown's avatar
unknown committed
414 415 416
		/* Since we rolled back the whole transaction, we must
		tell it also to MySQL so that MySQL knows to empty the
		cached binlog for this transaction */
unknown's avatar
unknown committed
417

unknown's avatar
unknown committed
418 419 420
		if (thd) {
			ha_rollback(thd);
		}
unknown's avatar
unknown committed
421

unknown's avatar
unknown committed
422
   		return(HA_ERR_LOCK_WAIT_TIMEOUT);
423 424 425

 	} else if (error == (int) DB_NO_REFERENCED_ROW) {

unknown's avatar
Merge  
unknown committed
426
    		return(HA_ERR_NO_REFERENCED_ROW);
427 428 429

 	} else if (error == (int) DB_ROW_IS_REFERENCED) {

unknown's avatar
Merge  
unknown committed
430
    		return(HA_ERR_ROW_IS_REFERENCED);
431

432
        } else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) {
433

unknown's avatar
Merge  
unknown committed
434
    		return(HA_ERR_CANNOT_ADD_FOREIGN);
435

unknown's avatar
unknown committed
436 437
        } else if (error == (int) DB_CANNOT_DROP_CONSTRAINT) {

unknown's avatar
unknown committed
438
    		return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
unknown's avatar
unknown committed
439 440
						misleading, a new MySQL error
						code should be introduced */
441 442
        } else if (error == (int) DB_COL_APPEARS_TWICE_IN_INDEX) {

unknown's avatar
unknown committed
443
    		return(HA_ERR_CRASHED);
444

445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
 	} else if (error == (int) DB_OUT_OF_FILE_SPACE) {

    		return(HA_ERR_RECORD_FILE_FULL);

 	} else if (error == (int) DB_TABLE_IS_BEING_USED) {

    		return(HA_ERR_WRONG_COMMAND);

 	} else if (error == (int) DB_TABLE_NOT_FOUND) {

    		return(HA_ERR_KEY_NOT_FOUND);

  	} else if (error == (int) DB_TOO_BIG_RECORD) {

    		return(HA_ERR_TO_BIG_ROW);
unknown's avatar
unknown committed
460 461 462 463

  	} else if (error == (int) DB_CORRUPTION) {

    		return(HA_ERR_CRASHED);
unknown's avatar
unknown committed
464 465 466
  	} else if (error == (int) DB_NO_SAVEPOINT) {

    		return(HA_ERR_NO_SAVEPOINT);
unknown's avatar
unknown committed
467 468 469
  	} else if (error == (int) DB_LOCK_TABLE_FULL) {

    		return(HA_ERR_LOCK_TABLE_FULL);
470
    	} else {
unknown's avatar
unknown committed
471
    		return(-1);			// Unknown error
472 473 474
    	}
}

475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
/*****************************************************************
If you want to print a thd that is not associated with the current thread,
you must call this function before reserving the InnoDB kernel_mutex, to
protect MySQL from setting thd->query NULL. If you print a thd of the current
thread, we know that MySQL cannot modify thd->query, and it is not necessary
to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
the kernel_mutex.
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_prepare_print_arbitrary_thd(void)
/*============================================*/
{
	VOID(pthread_mutex_lock(&LOCK_thread_count));
}

/*****************************************************************
Relases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_end_print_arbitrary_thd(void)
/*========================================*/
{
	VOID(pthread_mutex_unlock(&LOCK_thread_count));
}

504 505
/*****************************************************************
Prints info of a THD object (== user session thread) to the
unknown's avatar
unknown committed
506
standard output. NOTE that /mysql/innobase/trx/trx0trx.c must contain
507
the prototype for this function! */
508
extern "C"
509 510 511
void
innobase_mysql_print_thd(
/*=====================*/
512
	FILE*   f,	/* in: output stream */
513
        void*   input_thd)/* in: pointer to a MySQL THD object */
514
{
515 516
	const THD*	thd;
	const char*	s;
unknown's avatar
unknown committed
517
	char		buf[301];
518

519
        thd = (const THD*) input_thd;
520

521 522 523 524 525 526
  	fprintf(f, "MySQL thread id %lu, query id %lu",
		thd->thread_id, thd->query_id);
	if (thd->host) {
		putc(' ', f);
		fputs(thd->host, f);
	}
527

528 529 530 531
	if (thd->ip) {
		putc(' ', f);
		fputs(thd->ip, f);
	}
532

533
  	if (thd->user) {
534 535
		putc(' ', f);
		fputs(thd->user, f);
536 537
  	}

538
	if ((s = thd->proc_info)) {
539
		putc(' ', f);
540
		fputs(s, f);
541
	}
542

543
	if ((s = thd->query)) {
unknown's avatar
unknown committed
544
		/* determine the length of the query string */
unknown's avatar
unknown committed
545 546 547 548 549
		uint32 i, len;
		
		len = thd->query_length;

		if (len > 300) {
550
			len = 300;	/* ADDITIONAL SAFETY: print at most
unknown's avatar
unknown committed
551
					300 chars to reduce the probability of
552
					a seg fault if there is a race in
unknown's avatar
unknown committed
553 554 555
					thd->query_length in MySQL; after
					May 14, 2004 probably no race any more,
					but better be safe */
unknown's avatar
unknown committed
556
		}
unknown's avatar
unknown committed
557

558 559 560
                /* Use strmake to reduce the timeframe
                   for a race, compared to fwrite() */
		i= (uint) (strmake(buf, s, len) - buf);
unknown's avatar
unknown committed
561
		putc('\n', f);
unknown's avatar
unknown committed
562
		fwrite(buf, 1, i, f);
563
	}
564

565
	putc('\n', f);
566 567
}

568
/**********************************************************************
569
Get the variable length bounds of the given character set.
570 571 572 573

NOTE that the exact prototype of this function has to be in
/innobase/data/data0type.ic! */
extern "C"
574
void
575 576
innobase_get_cset_width(
/*====================*/
577 578 579
	ulint	cset,		/* in: MySQL charset-collation code */
	ulint*	mbminlen,	/* out: minimum length of a char (in bytes) */
	ulint*	mbmaxlen)	/* out: maximum length of a char (in bytes) */
580 581 582
{
	CHARSET_INFO*	cs;
	ut_ad(cset < 256);
583 584
	ut_ad(mbminlen);
	ut_ad(mbmaxlen);
585 586

	cs = all_charsets[cset];
587 588 589 590 591 592 593
	if (cs) {
		*mbminlen = cs->mbminlen;
		*mbmaxlen = cs->mbmaxlen;
	} else {
		ut_a(cset == 0);
		*mbminlen = *mbmaxlen = 0;
	}
594 595
}

596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625
/**********************************************************************
Compares NUL-terminated UTF-8 strings case insensitively.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
int
innobase_strcasecmp(
/*================*/
				/* out: 0 if a=b, <0 if a<b, >1 if a>b */
	const char*	a,	/* in: first string to compare */
	const char*	b)	/* in: second string to compare */
{
	return(my_strcasecmp(system_charset_info, a, b));
}

/**********************************************************************
Makes all characters in a NUL-terminated UTF-8 string lower case.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
void
innobase_casedn_str(
/*================*/
	char*	a)	/* in/out: string to put in lower case */
{
	my_casedn_str(system_charset_info, a);
}

626 627 628 629 630 631 632 633 634
/*************************************************************************
Creates a temporary file. */
extern "C"
int
innobase_mysql_tmpfile(void)
/*========================*/
			/* out: temporary file descriptor, or < 0 on error */
{
	char	filename[FN_REFLEN];
unknown's avatar
unknown committed
635
	int	fd2 = -1;
636
	File	fd = create_temp_file(filename, mysql_tmpdir, "ib",
637 638 639 640 641 642 643
#ifdef __WIN__
				O_BINARY | O_TRUNC | O_SEQUENTIAL |
				O_TEMPORARY | O_SHORT_LIVED |
#endif /* __WIN__ */
				O_CREAT | O_EXCL | O_RDWR,
				MYF(MY_WME));
	if (fd >= 0) {
unknown's avatar
unknown committed
644 645 646 647
#ifndef __WIN__
		/* On Windows, open files cannot be removed, but files can be
		created with the O_TEMPORARY flag to the same effect
		("delete on close"). */
648 649
		unlink(filename);
#endif /* !__WIN__ */
unknown's avatar
unknown committed
650 651 652 653 654 655 656 657 658 659 660 661
		/* Copy the file descriptor, so that the additional resources
		allocated by create_temp_file() can be freed by invoking
		my_close().

		Because the file descriptor returned by this function
		will be passed to fdopen(), it will be closed by invoking
		fclose(), which in turn will invoke close() instead of
		my_close(). */
		fd2 = dup(fd);
		if (fd2 < 0) {
			DBUG_PRINT("error",("Got error %d on dup",fd2));
			my_errno=errno;
662 663 664 665
                        my_error(EE_OUT_OF_FILERESOURCES,
                                 MYF(ME_BELL+ME_WAITTANG),
                                 filename, my_errno);
                }
unknown's avatar
unknown committed
666 667 668
		my_close(fd, MYF(MY_WME));
	}
	return(fd2);
669 670
}

671
/*************************************************************************
672 673
Gets the InnoDB transaction handle for a MySQL handler object, creates
an InnoDB transaction struct if the corresponding MySQL thread struct still
674
lacks one. */
675
static
676 677 678
trx_t*
check_trx_exists(
/*=============*/
679
			/* out: InnoDB transaction handle */
680 681 682 683
	THD*	thd)	/* in: user thread handle */
{
	trx_t*	trx;

unknown's avatar
unknown committed
684
	ut_ad(thd == current_thd);
unknown's avatar
unknown committed
685

686
        trx = (trx_t*) thd->ha_data[innobase_hton.slot];
687 688

	if (trx == NULL) {
unknown's avatar
unknown committed
689
	        DBUG_ASSERT(thd != NULL);
690
		trx = trx_allocate_for_mysql();
691

692
		trx->mysql_thd = thd;
693 694
		trx->mysql_query_str = &(thd->query);
                trx->active_trans = 0;
695

696 697 698 699
		/* Update the info whether we should skip XA steps that eat
		CPU time */
		trx->support_xa = (ibool)(thd->variables.innodb_support_xa);

700
                thd->ha_data[innobase_hton.slot] = trx;
unknown's avatar
unknown committed
701
	} else {
unknown's avatar
unknown committed
702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718
		if (trx->magic_n != TRX_MAGIC_N) {
			mem_analyze_corruption((byte*)trx);

			ut_a(0);
		}
	}

	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	} else {
		trx->check_foreigns = TRUE;
	}

	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
		trx->check_unique_secondary = FALSE;
	} else {
		trx->check_unique_secondary = TRUE;
719 720 721 722 723 724
	}

	return(trx);
}

/*************************************************************************
725
Updates the user_thd field in a handle and also allocates a new InnoDB
726 727
transaction handle if needed, and updates the transaction fields in the
prebuilt struct. */
728
inline
729 730 731 732 733 734
int
ha_innobase::update_thd(
/*====================*/
			/* out: 0 or error code */
	THD*	thd)	/* in: thd to use the handle */
{
735 736
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;
unknown's avatar
unknown committed
737

738 739
	trx = check_trx_exists(thd);

740
	if (prebuilt->trx != trx) {
741

742
		row_update_prebuilt_trx(prebuilt, trx);
743 744 745
	}

	user_thd = thd;
746

747 748 749
	return(0);
}

unknown's avatar
unknown committed
750 751 752 753 754 755 756
/*************************************************************************
Registers the InnoDB transaction in MySQL, to receive commit/rollback
events. This function must be called every time InnoDB starts a
transaction internally. */
static
void
register_trans(
unknown's avatar
unknown committed
757
/*===========*/
unknown's avatar
unknown committed
758
	THD*	thd)	/* in: thd to use the handle */
759
{
unknown's avatar
unknown committed
760
        /* Register the start of the statement */
unknown's avatar
unknown committed
761
        trans_register_ha(thd, FALSE, &innobase_hton);
unknown's avatar
unknown committed
762

unknown's avatar
unknown committed
763 764
        if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {

unknown's avatar
unknown committed
765
              /* No autocommit mode, register for a transaction */
unknown's avatar
unknown committed
766 767
              trans_register_ha(thd, TRUE, &innobase_hton);
        }
768
}
unknown's avatar
unknown committed
769 770 771 772 773 774 775 776 777 778

/*   BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
     ------------------------------------------------------------

1) The use of the query cache for TBL is disabled when there is an
uncommitted change to TBL.

2) When a change to TBL commits, InnoDB stores the current value of
its global trx id counter, let us denote it by INV_TRX_ID, to the table object
in the InnoDB data dictionary, and does only allow such transactions whose
unknown's avatar
unknown committed
779
id <= INV_TRX_ID to use the query cache.
unknown's avatar
unknown committed
780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822

3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
of TBL immediately.

How this is implemented inside InnoDB:

1) Since every modification always sets an IX type table lock on the InnoDB
table, it is easy to check if there can be uncommitted modifications for a
table: just check if there are locks in the lock list of the table.

2) When a transaction inside InnoDB commits, it reads the global trx id
counter and stores the value INV_TRX_ID to the tables on which it had a lock.

3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
InnoDB calls an invalidate method for the MySQL query cache for that table.

How this is implemented inside sql_cache.cc:

1) The query cache for an InnoDB table TBL is invalidated immediately at an
INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
invalidation to the transaction commit.

2) To store or retrieve a value from the query cache of an InnoDB table TBL,
any query must first ask InnoDB's permission. We must pass the thd as a
parameter because InnoDB will look at the trx id, if any, associated with
that thd.

3) Use of the query cache for InnoDB tables is now allowed also when
AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
put restrictions on the use of the query cache.
*/

/**********************************************************************
The MySQL query cache uses this to check from InnoDB if the query cache at
the moment is allowed to operate on an InnoDB table. The SQL query must
be a non-locking SELECT.

The query cache is allowed to operate on certain query only if this function
returns TRUE for all tables in the query.

If thd is not in the autocommit state, this function also starts a new
transaction for thd if there is no active trx yet, and assigns a consistent
unknown's avatar
unknown committed
823 824 825 826 827 828 829 830
read view to it if there is no read view yet.

Why a deadlock of threads is not possible: the query cache calls this function
at the start of a SELECT processing. Then the calling thread cannot be
holding any InnoDB semaphores. The calling thread is holding the
query cache mutex, and this function will reserver the InnoDB kernel mutex.
Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above
the InnoDB kernel mutex. */
unknown's avatar
unknown committed
831

unknown's avatar
unknown committed
832
my_bool
unknown's avatar
unknown committed
833 834 835 836 837 838 839 840 841 842 843 844
innobase_query_caching_of_table_permitted(
/*======================================*/
				/* out: TRUE if permitted, FALSE if not;
				note that the value FALSE does not mean
				we should invalidate the query cache:
				invalidation is called explicitly */
	THD*	thd,		/* in: thd of the user who is trying to
				store a result to the query cache or
				retrieve it */
	char*	full_name,	/* in: concatenation of database name,
				the null character '\0', and the table
				name */
unknown's avatar
unknown committed
845
	uint	full_name_len,	/* in: length of the full name, i.e.
unknown's avatar
unknown committed
846
				len(dbname) + len(tablename) + 1 */
unknown's avatar
unknown committed
847
        ulonglong *unused)      /* unused for this engine */
unknown's avatar
unknown committed
848 849 850 851 852 853 854 855 856
{
	ibool	is_autocommit;
	trx_t*	trx;
	char	norm_name[1000];

	ut_a(full_name_len < 999);

	if (thd->variables.tx_isolation == ISO_SERIALIZABLE) {
		/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
unknown's avatar
unknown committed
857
		plain SELECT if AUTOCOMMIT is not on. */
unknown's avatar
unknown committed
858 859 860 861
	
		return((my_bool)FALSE);
	}

unknown's avatar
unknown committed
862
        trx = check_trx_exists(thd);
unknown's avatar
unknown committed
863 864 865 866 867 868 869
	if (trx->has_search_latch) {
		ut_print_timestamp(stderr);
		fprintf(stderr,
"  InnoDB: Error: the calling thread is holding the adaptive search\n"
"InnoDB: latch though calling innobase_query_caching_of_table_permitted\n");
	}

unknown's avatar
unknown committed
870 871 872 873 874 875 876 877 878 879
	innobase_release_stat_resources(trx);

	if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {

		is_autocommit = TRUE;
	} else {
		is_autocommit = FALSE;

	}

unknown's avatar
unknown committed
880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897
	if (is_autocommit && trx->n_mysql_tables_in_use == 0) {
		/* We are going to retrieve the query result from the query
		cache. This cannot be a store operation to the query cache
		because then MySQL would have locks on tables already.

		TODO: if the user has used LOCK TABLES to lock the table,
		then we open a transaction in the call of row_.. below.
		That trx can stay open until UNLOCK TABLES. The same problem
		exists even if we do not use the query cache. MySQL should be
		modified so that it ALWAYS calls some cleanup function when
		the processing of a query ends!

		We can imagine we instantaneously serialize this consistent
		read trx to the current trx id counter. If trx2 would have
		changed the tables of a query result stored in the cache, and
		trx2 would have already committed, making the result obsolete,
		then trx2 would have already invalidated the cache. Thus we
		can trust the result in the cache is ok for this query. */
unknown's avatar
unknown committed
898 899 900 901 902 903 904 905 906 907 908 909

		return((my_bool)TRUE);
	}
	
	/* Normalize the table name to InnoDB format */

	memcpy(norm_name, full_name, full_name_len);

	norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the
					    separator between db and table */
	norm_name[full_name_len] = '\0';
#ifdef __WIN__
910
	innobase_casedn_str(norm_name);
unknown's avatar
unknown committed
911
#endif
unknown's avatar
unknown committed
912 913 914
	/* The call of row_search_.. will start a new transaction if it is
	not yet started */

915 916 917 918 919
        if (trx->active_trans == 0) {

                register_trans(thd);
                trx->active_trans = 1;
        }
unknown's avatar
unknown committed
920

unknown's avatar
unknown committed
921 922
	if (row_search_check_if_query_cache_permitted(trx, norm_name)) {

unknown's avatar
unknown committed
923
		/* printf("Query cache for %s permitted\n", norm_name); */
unknown's avatar
unknown committed
924 925 926 927

		return((my_bool)TRUE);
	}

unknown's avatar
unknown committed
928
	/* printf("Query cache for %s NOT permitted\n", norm_name); */
unknown's avatar
unknown committed
929 930 931 932 933 934 935 936

	return((my_bool)FALSE);
}

/*********************************************************************
Invalidates the MySQL query cache for the table.
NOTE that the exact prototype of this function has to be in
/innobase/row/row0ins.c! */
937
extern "C"
unknown's avatar
unknown committed
938 939 940 941 942
void
innobase_invalidate_query_cache(
/*============================*/
	trx_t*	trx,		/* in: transaction which modifies the table */
	char*	full_name,	/* in: concatenation of database name, null
unknown's avatar
unknown committed
943 944 945 946 947
				char '\0', table name, null char'\0';
				NOTE that in Windows this is always
				in LOWER CASE! */
	ulint	full_name_len)	/* in: full name length where also the null
				chars count */
unknown's avatar
unknown committed
948
{
unknown's avatar
unknown committed
949 950 951 952
	/* Note that the sync0sync.h rank of the query cache mutex is just
	above the InnoDB kernel mutex. The caller of this function must not
	have latches of a lower rank. */

unknown's avatar
unknown committed
953
	/* Argument TRUE below means we are using transactions */
954
#ifdef HAVE_QUERY_CACHE
unknown's avatar
unknown committed
955 956 957 958
	query_cache.invalidate((THD*)(trx->mysql_thd),
					(const char*)full_name,
					(uint32)full_name_len,
					TRUE);
959
#endif
unknown's avatar
unknown committed
960
}
961 962

/*********************************************************************
963 964
Get the quote character to be used in SQL identifiers.
This definition must match the one in innobase/ut/ut0ut.c! */
965
extern "C"
966 967 968
int
mysql_get_identifier_quote_char(
/*============================*/
969
				/* out: quote character to be
970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989
				used in SQL identifiers; EOF if none */
	trx_t*		trx,	/* in: transaction */
	const char*	name,	/* in: name to print */
	ulint		namelen)/* in: length of name */
{
	if (!trx || !trx->mysql_thd) {
		return(EOF);
	}
	return(get_quote_char_for_identifier((THD*) trx->mysql_thd,
						name, namelen));
}

/**************************************************************************
Obtain a pointer to the MySQL THD object, as in current_thd().  This
definition must match the one in sql/ha_innodb.cc! */
extern "C"
void*
innobase_current_thd(void)
/*======================*/
			/* out: MySQL THD object */
990
{
991
	return(current_thd);
unknown's avatar
unknown committed
992 993
}

994 995 996 997
/*********************************************************************
Call this when you have opened a new table handle in HANDLER, before you
call index_read_idx() etc. Actually, we can let the cursor stay open even
over a transaction commit! Then you should call this before every operation,
998
fetch next etc. This function inits the necessary things even after a
999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
transaction commit. */

void
ha_innobase::init_table_handle_for_HANDLER(void)
/*============================================*/
{
        row_prebuilt_t* prebuilt;

        /* If current thd does not yet have a trx struct, create one.
        If the current handle does not yet have a prebuilt struct, create
        one. Update the trx pointers in the prebuilt struct. Normally
        this operation is done in external_lock. */

        update_thd(current_thd);

        /* Initialize the prebuilt struct much like it would be inited in
        external_lock */

        prebuilt = (row_prebuilt_t*)innobase_prebuilt;

unknown's avatar
unknown committed
1019 1020
	innobase_release_stat_resources(prebuilt->trx);

1021 1022 1023 1024 1025 1026 1027 1028
        /* If the transaction is not started yet, start it */

        trx_start_if_not_started_noninline(prebuilt->trx);

        /* Assign a read view if the transaction does not have it yet */

        trx_assign_read_view(prebuilt->trx);

1029 1030
	/* Set the MySQL flag to mark that there is an active transaction */

1031 1032 1033 1034 1035 1036
        if (prebuilt->trx->active_trans == 0) {

                register_trans(current_thd);

                prebuilt->trx->active_trans = 1;
        }
1037

1038 1039 1040 1041 1042 1043 1044 1045 1046
        /* We did the necessary inits in this function, no need to repeat them
        in row_search_for_mysql */

        prebuilt->sql_stat_start = FALSE;

        /* We let HANDLER always to do the reads as consistent reads, even
        if the trx isolation level would have been specified as SERIALIZABLE */

        prebuilt->select_lock_type = LOCK_NONE;
unknown's avatar
unknown committed
1047
        prebuilt->stored_select_lock_type = LOCK_NONE;
1048 1049 1050

        /* Always fetch all columns in the index record */

unknown's avatar
unknown committed
1051
        prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
1052 1053 1054 1055 1056

        /* We want always to fetch all columns in the whole row? Or do
	we???? */

        prebuilt->read_just_key = FALSE;
1057 1058

	prebuilt->used_in_HANDLER = TRUE;
1059 1060

	prebuilt->keep_other_fields_on_keyread = FALSE;
1061 1062
}

1063
/*************************************************************************
1064
Opens an InnoDB database. */
1065

unknown's avatar
unknown committed
1066
handlerton*
1067 1068
innobase_init(void)
/*===============*/
1069
			/* out: TRUE if error */
1070
{
unknown's avatar
unknown committed
1071
	static char	current_dir[3];		/* Set if using current lib */
1072 1073
	int		err;
	bool		ret;
1074
	char 	        *default_path;
unknown's avatar
merge  
unknown committed
1075

1076 1077
  	DBUG_ENTER("innobase_init");

unknown's avatar
unknown committed
1078 1079
	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);

unknown's avatar
unknown committed
1080
  	os_innodb_umask = (ulint)my_umask;
unknown's avatar
unknown committed
1081

unknown's avatar
unknown committed
1082 1083 1084 1085 1086 1087
	/* First calculate the default path for innodb_data_home_dir etc.,
	in case the user has not given any value.

	Note that when using the embedded server, the datadirectory is not
	necessarily the current directory of this program. */

1088
	if (mysqld_embedded) {
unknown's avatar
unknown committed
1089
		default_path = mysql_real_data_home;
unknown's avatar
unknown committed
1090
		fil_path_to_mysql_datadir = mysql_real_data_home;
unknown's avatar
unknown committed
1091 1092 1093 1094 1095 1096
	} else {
	  	/* It's better to use current lib, to keep paths short */
	  	current_dir[0] = FN_CURLIB;
	  	current_dir[1] = FN_LIBCHAR;
	  	current_dir[2] = 0;
	  	default_path = current_dir;
unknown's avatar
unknown committed
1097 1098
	}

unknown's avatar
unknown committed
1099 1100
	ut_a(default_path);

unknown's avatar
unknown committed
1101 1102 1103 1104 1105 1106
	if (specialflag & SPECIAL_NO_PRIOR) {
	        srv_set_thread_priorities = FALSE;
	} else {
	        srv_set_thread_priorities = TRUE;
	        srv_query_thread_priority = QUERY_PRIOR;
	}
unknown's avatar
unknown committed
1107

unknown's avatar
unknown committed
1108 1109
	/* Set InnoDB initialization parameters according to the values
	read from MySQL .cnf file */
unknown's avatar
unknown committed
1110

unknown's avatar
unknown committed
1111
	/*--------------- Data files -------------------------*/
1112

unknown's avatar
unknown committed
1113
	/* The default dir for data files is the datadir of MySQL */
unknown's avatar
unknown committed
1114 1115

	srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
unknown's avatar
unknown committed
1116
			 default_path);
unknown's avatar
unknown committed
1117

unknown's avatar
unknown committed
1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133
	/* Set default InnoDB data file size to 10 MB and let it be
  	auto-extending. Thus users can use InnoDB in >= 4.0 without having
	to specify any startup options. */

	if (!innobase_data_file_path) {
  		innobase_data_file_path = (char*) "ibdata1:10M:autoextend";
	}

	/* Since InnoDB edits the argument in the next call, we make another
	copy of it: */

	internal_innobase_data_file_path = my_strdup(innobase_data_file_path,
						   MYF(MY_WME));

	ret = (bool) srv_parse_data_file_paths_and_sizes(
				internal_innobase_data_file_path,
unknown's avatar
unknown committed
1134 1135 1136 1137 1138 1139
				&srv_data_file_names,
				&srv_data_file_sizes,
				&srv_data_file_is_raw_partition,
				&srv_n_data_files,
				&srv_auto_extend_last_data_file,
				&srv_last_file_size_max);
1140
	if (ret == FALSE) {
unknown's avatar
unknown committed
1141 1142
	  	sql_print_error(
			"InnoDB: syntax error in innodb_data_file_path");
unknown's avatar
unknown committed
1143 1144
	  	my_free(internal_innobase_data_file_path,
						MYF(MY_ALLOW_ZERO_PTR));
1145
	  	DBUG_RETURN(0);
unknown's avatar
unknown committed
1146
	}
1147

unknown's avatar
unknown committed
1148 1149 1150 1151 1152 1153 1154
	/* -------------- Log files ---------------------------*/

	/* The default dir for log files is the datadir of MySQL */
	
	if (!innobase_log_group_home_dir) {
	  	innobase_log_group_home_dir = default_path;
	}
unknown's avatar
unknown committed
1155 1156

#ifdef UNIV_LOG_ARCHIVE	  	
unknown's avatar
unknown committed
1157 1158 1159 1160 1161 1162 1163
	/* Since innodb_log_arch_dir has no relevance under MySQL,
	starting from 4.0.6 we always set it the same as
	innodb_log_group_home_dir: */

	innobase_log_arch_dir = innobase_log_group_home_dir;

	srv_arch_dir = innobase_log_arch_dir;
unknown's avatar
unknown committed
1164
#endif /* UNIG_LOG_ARCHIVE */
unknown's avatar
unknown committed
1165

unknown's avatar
unknown committed
1166 1167 1168
	ret = (bool)
		srv_parse_log_group_home_dirs(innobase_log_group_home_dir,
						&srv_log_group_home_dirs);
unknown's avatar
unknown committed
1169

unknown's avatar
unknown committed
1170 1171 1172 1173
	if (ret == FALSE || innobase_mirrored_log_groups != 1) {
		fprintf(stderr,
		"InnoDB: syntax error in innodb_log_group_home_dir\n"
		"InnoDB: or a wrong number of mirrored log groups\n");
unknown's avatar
unknown committed
1174

unknown's avatar
unknown committed
1175 1176
	  	my_free(internal_innobase_data_file_path,
						MYF(MY_ALLOW_ZERO_PTR));
1177
		DBUG_RETURN(0);
unknown's avatar
unknown committed
1178
	}
unknown's avatar
unknown committed
1179

unknown's avatar
unknown committed
1180 1181 1182
	/* --------------------------------------------------*/

	srv_file_flush_method_str = innobase_unix_file_flush_method;
1183

unknown's avatar
unknown committed
1184
	srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
1185
	srv_n_log_files = (ulint) innobase_log_files_in_group;
unknown's avatar
unknown committed
1186 1187
	srv_log_file_size = (ulint) innobase_log_file_size;

unknown's avatar
unknown committed
1188
#ifdef UNIV_LOG_ARCHIVE
unknown's avatar
unknown committed
1189
	srv_log_archive_on = (ulint) innobase_log_archive;
unknown's avatar
unknown committed
1190
#endif /* UNIV_LOG_ARCHIVE */
unknown's avatar
unknown committed
1191
	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
unknown's avatar
unknown committed
1192
	srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit;
1193

unknown's avatar
unknown committed
1194 1195
        /* We set srv_pool_size here in units of 1 kB. InnoDB internally
        changes the value so that it becomes the number of database pages. */
unknown's avatar
unknown committed
1196 1197

        if (innobase_buffer_pool_awe_mem_mb == 0) {
unknown's avatar
unknown committed
1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212
                /* Careful here: we first convert the signed long int to ulint
                and only after that divide */
 
                srv_pool_size = ((ulint) innobase_buffer_pool_size) / 1024;
        } else {
                srv_use_awe = TRUE;
                srv_pool_size = (ulint)
                                (1024 * innobase_buffer_pool_awe_mem_mb);
                srv_awe_window_size = (ulint) innobase_buffer_pool_size;
 
                /* Note that what the user specified as
                innodb_buffer_pool_size is actually the AWE memory window
                size in this case, and the real buffer pool size is
                determined by .._awe_mem_mb. */
        }
unknown's avatar
unknown committed
1213

unknown's avatar
unknown committed
1214 1215 1216
	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;

	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
1217

1218
	srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
unknown's avatar
Merge  
unknown committed
1219 1220 1221
	srv_force_recovery = (ulint) innobase_force_recovery;

	srv_fast_shutdown = (ibool) innobase_fast_shutdown;
1222

unknown's avatar
unknown committed
1223 1224
	srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
	srv_use_checksums = (ibool) innobase_use_checksums;
1225

unknown's avatar
unknown committed
1226 1227
	os_use_large_pages = (ibool) innobase_use_large_pages;
	os_large_page_size = (ulint) innobase_large_page_size;
1228
  
unknown's avatar
unknown committed
1229
	srv_file_per_table = (ibool) innobase_file_per_table;
1230
        srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
unknown's avatar
unknown committed
1231 1232

	srv_max_n_open_files = (ulint) innobase_open_files;
1233
	srv_innodb_status = (ibool) innobase_create_status_file;
unknown's avatar
unknown committed
1234

1235
	srv_print_verbose_log = mysqld_embedded ? 0 : 1;
unknown's avatar
unknown committed
1236

unknown's avatar
unknown committed
1237
	/* Store the default charset-collation number of this MySQL
1238
	installation */
unknown's avatar
unknown committed
1239

1240
	data_mysql_default_charset_coll = (ulint)default_charset_info->number;
unknown's avatar
unknown committed
1241

1242 1243 1244
	data_mysql_latin1_swedish_charset_coll =
					(ulint)my_charset_latin1.number;

unknown's avatar
unknown committed
1245 1246 1247 1248
	/* Store the latin1_swedish_ci character ordering table to InnoDB. For
	non-latin1_swedish_ci charsets we use the MySQL comparison functions,
	and consequently we do not need to know the ordering internally in
	InnoDB. */
unknown's avatar
unknown committed
1249

1250
	ut_a(0 == strcmp((char*)my_charset_latin1.name,
unknown's avatar
unknown committed
1251 1252
						(char*)"latin1_swedish_ci"));
	memcpy(srv_latin1_ordering, my_charset_latin1.sort_order, 256);
1253

1254 1255 1256 1257 1258 1259 1260 1261
	/* Since we in this module access directly the fields of a trx
        struct, and due to different headers and flags it might happen that
	mutex_t has a different size in this module and in InnoDB
	modules, we check at run time that the size is the same in
	these compilation modules. */

	srv_sizeof_trx_t_in_ha_innodb_cc = sizeof(trx_t);

unknown's avatar
unknown committed
1262
	err = innobase_start_or_create_for_mysql();
1263 1264

	if (err != DB_SUCCESS) {
unknown's avatar
unknown committed
1265 1266
	  	my_free(internal_innobase_data_file_path,
						MYF(MY_ALLOW_ZERO_PTR));
1267
		DBUG_RETURN(0);
1268
	}
unknown's avatar
unknown committed
1269 1270 1271

	(void) hash_init(&innobase_open_tables,system_charset_info, 32, 0, 0,
			 		(hash_get_key) innobase_get_key, 0, 0);
unknown's avatar
unknown committed
1272 1273
        pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
        pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
1274
	innodb_inited= 1;
unknown's avatar
unknown committed
1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290

	/* If this is a replication slave and we needed to do a crash recovery,
	set the master binlog position to what InnoDB internally knew about
	how far we got transactions durable inside InnoDB. There is a
	problem here: if the user used also MyISAM tables, InnoDB might not
	know the right position for them.

	THIS DOES NOT WORK CURRENTLY because replication seems to initialize
	glob_mi also after innobase_init. */
	
/*	if (trx_sys_mysql_master_log_pos != -1) {
		ut_memcpy(glob_mi.log_file_name, trx_sys_mysql_master_log_name,
				1 + ut_strlen(trx_sys_mysql_master_log_name));
		glob_mi.pos = trx_sys_mysql_master_log_pos;
	}
*/
1291
	DBUG_RETURN(&innobase_hton);
1292 1293 1294
}

/***********************************************************************
1295
Closes an InnoDB database. */
1296

1297
bool
1298 1299
innobase_end(void)
/*==============*/
1300
				/* out: TRUE if error */
1301
{
1302
	int	err= 0;
1303 1304 1305

	DBUG_ENTER("innobase_end");

1306 1307 1308 1309 1310
#ifdef __NETWARE__ 	/* some special cleanup for NetWare */
	if (nw_panic) {
		set_panic_flag_for_netware();
	}
#endif
unknown's avatar
unknown committed
1311 1312 1313 1314
	if (innodb_inited) {
	  	if (innobase_very_fast_shutdown) {
	    		srv_very_fast_shutdown = TRUE;
	    		fprintf(stderr,
1315 1316 1317
"InnoDB: MySQL has requested a very fast shutdown without flushing\n"
"InnoDB: the InnoDB buffer pool to data files. At the next mysqld startup\n"
"InnoDB: InnoDB will do a crash recovery!\n");
unknown's avatar
unknown committed
1318
	  	}
1319

unknown's avatar
unknown committed
1320 1321 1322 1323 1324 1325 1326
	  	innodb_inited = 0;
	  	if (innobase_shutdown_for_mysql() != DB_SUCCESS) {
	    		err = 1;
		}
	  	hash_free(&innobase_open_tables);
	  	my_free(internal_innobase_data_file_path,
						MYF(MY_ALLOW_ZERO_PTR));
unknown's avatar
unknown committed
1327 1328
                pthread_mutex_destroy(&innobase_share_mutex);
                pthread_mutex_destroy(&prepare_commit_mutex);
1329
	}
1330

1331
  	DBUG_RETURN(err);
1332 1333 1334
}

/********************************************************************
unknown's avatar
unknown committed
1335 1336
Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
the logs, and the name of this function should be innobase_checkpoint. */
1337

1338
bool
1339 1340
innobase_flush_logs(void)
/*=====================*/
1341
				/* out: TRUE if error */
1342
{
1343
  	bool 	result = 0;
1344 1345 1346

  	DBUG_ENTER("innobase_flush_logs");

unknown's avatar
unknown committed
1347
	log_buffer_flush_to_disk();
1348

1349 1350 1351 1352
  	DBUG_RETURN(result);
}

/*********************************************************************
1353
Commits a transaction in an InnoDB database. */
1354

unknown's avatar
unknown committed
1355 1356 1357 1358 1359
void
innobase_commit_low(
/*================*/
	trx_t*	trx)	/* in: transaction handle */
{
unknown's avatar
unknown committed
1360
        if (trx->conc_state == TRX_NOT_STARTED) {
unknown's avatar
unknown committed
1361

unknown's avatar
unknown committed
1362 1363
                return;
        }
unknown's avatar
unknown committed
1364

unknown's avatar
unknown committed
1365 1366 1367 1368
#ifdef HAVE_REPLICATION
        THD *thd=current_thd;

        if (thd && thd->slave_thread) {
1369
                /* Update the replication position info inside InnoDB */
unknown's avatar
unknown committed
1370

1371
                trx->mysql_master_log_file_name
1372
                                        = active_mi->rli.group_master_log_name;
unknown's avatar
unknown committed
1373 1374
                trx->mysql_master_log_pos = ((ib_longlong)
                                active_mi->rli.future_group_master_log_pos);
1375
        }
unknown's avatar
SCRUM  
unknown committed
1376
#endif /* HAVE_REPLICATION */
unknown's avatar
unknown committed
1377

unknown's avatar
unknown committed
1378
	trx_commit_for_mysql(trx);
unknown's avatar
unknown committed
1379 1380
}

1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417
/*********************************************************************
Creates an InnoDB transaction struct for the thd if it does not yet have one.
Starts a new InnoDB transaction if a transaction is not yet started. And
assigns a new snapshot for a consistent read if the transaction does not yet
have one. */

int
innobase_start_trx_and_assign_read_view(
/*====================================*/
			/* out: 0 */
	THD*	thd)	/* in: MySQL thread handle of the user for whom
			the transaction should be committed */
{
	trx_t*	trx;

  	DBUG_ENTER("innobase_start_trx_and_assign_read_view");

	/* Create a new trx struct for thd, if it does not yet have one */

	trx = check_trx_exists(thd);

	/* This is just to play safe: release a possible FIFO ticket and
	search latch. Since we will reserve the kernel mutex, we have to
	release the search system latch first to obey the latching order. */

	innobase_release_stat_resources(trx);

	/* If the transaction is not started yet, start it */

	trx_start_if_not_started_noninline(trx);

	/* Assign a read view if the transaction does not have it yet */

	trx_assign_read_view(trx);

	/* Set the MySQL flag to mark that there is an active transaction */

1418 1419 1420 1421 1422 1423
        if (trx->active_trans == 0) {

                register_trans(current_thd);

                trx->active_trans = 1;
        }
1424 1425 1426 1427

	DBUG_RETURN(0);
}

unknown's avatar
unknown committed
1428
/*********************************************************************
unknown's avatar
unknown committed
1429 1430
Commits a transaction in an InnoDB database or marks an SQL statement
ended. */
unknown's avatar
unknown committed
1431

1432
static int
1433 1434
innobase_commit(
/*============*/
unknown's avatar
unknown committed
1435
			/* out: 0 */
unknown's avatar
unknown committed
1436
	THD*	thd,	/* in: MySQL thread handle of the user for whom
1437
			the transaction should be committed */
1438 1439
        bool    all)    /* in: TRUE - commit transaction
                               FALSE - the current SQL statement ended */
1440
{
1441
	trx_t*		trx;
1442 1443 1444 1445

  	DBUG_ENTER("innobase_commit");
  	DBUG_PRINT("trans", ("ending transaction"));

unknown's avatar
unknown committed
1446
	trx = check_trx_exists(thd);
1447

1448 1449 1450
	/* Update the info whether we should skip XA steps that eat CPU time */
	trx->support_xa = (ibool)(thd->variables.innodb_support_xa);

unknown's avatar
unknown committed
1451 1452 1453
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */
unknown's avatar
unknown committed
1454

unknown's avatar
unknown committed
1455
	innobase_release_stat_resources(trx);
unknown's avatar
unknown committed
1456

1457
        /* The flag trx->active_trans is set to 1 in
unknown's avatar
unknown committed
1458 1459 1460

	1. ::external_lock(),
	2. ::start_stmt(),
1461
	3. innobase_query_caching_of_table_permitted(),
unknown's avatar
unknown committed
1462
	4. innobase_savepoint(),
1463
	5. ::init_table_handle_for_HANDLER(),
1464 1465
	6. innobase_start_trx_and_assign_read_view(),
	7. ::transactional_table_lock()
unknown's avatar
unknown committed
1466 1467 1468 1469 1470

	and it is only set to 0 in a commit or a rollback. If it is 0 we know
	there cannot be resources to be freed and we could return immediately.
	For the time being, we play safe and do the cleanup though there should
	be nothing to clean up. */
unknown's avatar
unknown committed
1471

1472
        if (trx->active_trans == 0
unknown's avatar
unknown committed
1473 1474 1475
	    && trx->conc_state != TRX_NOT_STARTED) {
	    
	        fprintf(stderr,
1476
"InnoDB: Error: trx->active_trans == 0\n"
unknown's avatar
unknown committed
1477
"InnoDB: but trx->conc_state != TRX_NOT_STARTED\n");
unknown's avatar
unknown committed
1478
	}
unknown's avatar
unknown committed
1479

unknown's avatar
unknown committed
1480 1481
        if (all
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
unknown's avatar
unknown committed
1482
	        
1483
 		/* We were instructed to commit the whole transaction, or
unknown's avatar
unknown committed
1484 1485
		this is an SQL statement end and autocommit is on */

unknown's avatar
unknown committed
1486
                /* We need current binlog position for ibbackup to work.
unknown's avatar
unknown committed
1487 1488 1489 1490 1491
                Note, the position is current because of prepare_commit_mutex */
                trx->mysql_log_file_name = mysql_bin_log.get_log_fname();
                trx->mysql_log_offset =
                        (ib_longlong)mysql_bin_log.get_log_file()->pos_in_file;

unknown's avatar
unknown committed
1492
		innobase_commit_low(trx);
unknown's avatar
unknown committed
1493

unknown's avatar
unknown committed
1494 1495 1496 1497
                if (trx->active_trans == 2) {

                        pthread_mutex_unlock(&prepare_commit_mutex);
                }
1498
                trx->active_trans = 0;
unknown's avatar
unknown committed
1499

unknown's avatar
unknown committed
1500
	} else {
unknown's avatar
unknown committed
1501 1502 1503
	        /* We just mark the SQL statement ended and do not do a
		transaction commit */

unknown's avatar
unknown committed
1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514
		if (trx->auto_inc_lock) {
			/* If we had reserved the auto-inc lock for some
			table in this SQL statement we release it now */
		  	
			row_unlock_table_autoinc_for_mysql(trx);
		}
		/* Store the current undo_no of the transaction so that we
		know where to roll back if we have to roll back the next
		SQL statement */

		trx_mark_sql_stat_end(trx);
unknown's avatar
unknown committed
1515
	}
1516

unknown's avatar
unknown committed
1517 1518
	/* Tell the InnoDB server that there might be work for utility
	threads: */
1519 1520 1521

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
1522
	DBUG_RETURN(0);
1523 1524
}

unknown's avatar
unknown committed
1525 1526 1527
/* The following defined-out code will be enabled later when we put the
MySQL-4.1 functionality back to 5.0. This is needed to get InnoDB Hot Backup
to work. */
1528
#if 0
1529 1530 1531 1532
/*********************************************************************
This is called when MySQL writes the binlog entry for the current
transaction. Writes to the InnoDB tablespace info which tells where the
MySQL binlog entry for the current transaction ended. Also commits the
unknown's avatar
unknown committed
1533
transaction inside InnoDB but does NOT flush InnoDB log files to disk.
unknown's avatar
unknown committed
1534
To flush you have to call innobase_commit_complete(). We have separated
unknown's avatar
unknown committed
1535 1536
flushing to eliminate the bottleneck of LOCK_log in log.cc which disabled
InnoDB's group commit capability. */
1537 1538 1539 1540

int
innobase_report_binlog_offset_and_commit(
/*=====================================*/
unknown's avatar
unknown committed
1541
                                /* out: 0 */
1542
        THD*    thd,            /* in: user thread */
unknown's avatar
unknown committed
1543
        void*   trx_handle,     /* in: InnoDB trx handle */
1544 1545
        char*   log_file_name,  /* in: latest binlog file name */
        my_off_t end_offset)    /* in: the offset in the binlog file
unknown's avatar
unknown committed
1546
                                   up to which we wrote */
1547
{
unknown's avatar
unknown committed
1548 1549 1550
	trx_t*	trx;

	trx = (trx_t*)trx_handle;
1551

unknown's avatar
unknown committed
1552 1553
	ut_a(trx != NULL);

unknown's avatar
unknown committed
1554
	trx->mysql_log_file_name = log_file_name;
unknown's avatar
unknown committed
1555
	trx->mysql_log_offset = (ib_longlong)end_offset;
unknown's avatar
unknown committed
1556

unknown's avatar
unknown committed
1557 1558
	trx->flush_log_later = TRUE;

unknown's avatar
unknown committed
1559
	innobase_commit(thd, trx_handle);
unknown's avatar
unknown committed
1560 1561 1562 1563 1564 1565

	trx->flush_log_later = FALSE;

	return(0);
}

1566 1567 1568
/***********************************************************************
This function stores the binlog offset and flushes logs. */

unknown's avatar
unknown committed
1569
void
1570 1571 1572
innobase_store_binlog_offset_and_flush_log(
/*=======================================*/
    char *binlog_name,          /* in: binlog name */
unknown's avatar
unknown committed
1573
    longlong	offset)		/* in: binlog offset */
1574 1575
{
	mtr_t mtr;
unknown's avatar
unknown committed
1576

1577 1578 1579
	assert(binlog_name != NULL);

	/* Start a mini-transaction */
unknown's avatar
unknown committed
1580
        mtr_start_noninline(&mtr);
1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591

	/* Update the latest MySQL binlog name and offset info
        in trx sys header */

        trx_sys_update_mysql_binlog_offset(
            binlog_name,
            offset,
            TRX_SYS_MYSQL_LOG_INFO, &mtr);

        /* Commits the mini-transaction */
        mtr_commit(&mtr);
unknown's avatar
unknown committed
1592

1593 1594 1595 1596 1597 1598
	/* Syncronous flush of the log buffer to disk */
	log_buffer_flush_to_disk();
}

#endif

unknown's avatar
unknown committed
1599 1600 1601 1602 1603 1604 1605 1606
/*********************************************************************
This is called after MySQL has written the binlog entry for the current
transaction. Flushes the InnoDB log files to disk if required. */

int
innobase_commit_complete(
/*=====================*/
                                /* out: 0 */
1607
        THD*    thd)            /* in: user thread */
unknown's avatar
unknown committed
1608 1609 1610
{
	trx_t*	trx;

1611
        trx = (trx_t*) thd->ha_data[innobase_hton.slot];
unknown's avatar
unknown committed
1612

1613
        if (trx && trx->active_trans) {
unknown's avatar
unknown committed
1614

unknown's avatar
unknown committed
1615
                trx->active_trans = 0;
unknown's avatar
unknown committed
1616

unknown's avatar
unknown committed
1617
                if (srv_flush_log_at_trx_commit == 0) {
unknown's avatar
unknown committed
1618

unknown's avatar
unknown committed
1619 1620
                        return(0);
                }
unknown's avatar
unknown committed
1621

unknown's avatar
unknown committed
1622
                trx_commit_complete_for_mysql(trx);
1623
        }
unknown's avatar
unknown committed
1624 1625

	return(0);
1626 1627
}

1628
/*********************************************************************
unknown's avatar
unknown committed
1629
Rolls back a transaction or the latest SQL statement. */
1630

1631
static int
1632 1633 1634
innobase_rollback(
/*==============*/
			/* out: 0 or error number */
unknown's avatar
unknown committed
1635
	THD*	thd,	/* in: handle to the MySQL thread of the user
1636
			whose transaction should be rolled back */
1637 1638
        bool    all)    /* in: TRUE - commit transaction
                               FALSE - the current SQL statement ended */
1639 1640
{
	int	error = 0;
1641
	trx_t*	trx;
1642

1643 1644 1645
	DBUG_ENTER("innobase_rollback");
	DBUG_PRINT("trans", ("aborting transaction"));

1646
	trx = check_trx_exists(thd);
1647

1648 1649 1650
	/* Update the info whether we should skip XA steps that eat CPU time */
	trx->support_xa = (ibool)(thd->variables.innodb_support_xa);

unknown's avatar
unknown committed
1651 1652 1653 1654 1655 1656
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

unknown's avatar
unknown committed
1657
        if (trx->auto_inc_lock) {
unknown's avatar
unknown committed
1658 1659 1660 1661
		/* If we had reserved the auto-inc lock for some table (if
		we come here to roll back the latest SQL statement) we
		release it now before a possibly lengthy rollback */
		
unknown's avatar
unknown committed
1662 1663 1664
		row_unlock_table_autoinc_for_mysql(trx);
	}

unknown's avatar
unknown committed
1665 1666
        if (all
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
unknown's avatar
Merge  
unknown committed
1667

1668
		error = trx_rollback_for_mysql(trx);
1669
                trx->active_trans = 0;
unknown's avatar
unknown committed
1670
	} else {
1671
		error = trx_rollback_last_sql_stat_for_mysql(trx);
unknown's avatar
unknown committed
1672
	}
1673

unknown's avatar
unknown committed
1674 1675 1676
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709
/*********************************************************************
Rolls back a transaction */

int
innobase_rollback_trx(
/*==================*/
			/* out: 0 or error number */
	trx_t*	trx)	/*  in: transaction */
{
	int	error = 0;

	DBUG_ENTER("innobase_rollback_trx");
	DBUG_PRINT("trans", ("aborting transaction"));

	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

        if (trx->auto_inc_lock) {
		/* If we had reserved the auto-inc lock for some table (if
		we come here to roll back the latest SQL statement) we
		release it now before a possibly lengthy rollback */
		
		row_unlock_table_autoinc_for_mysql(trx);
	}

	error = trx_rollback_for_mysql(trx);

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

unknown's avatar
unknown committed
1710 1711 1712
/*********************************************************************
Rolls back a transaction to a savepoint. */

1713
static int
unknown's avatar
unknown committed
1714 1715 1716 1717 1718 1719
innobase_rollback_to_savepoint(
/*===========================*/
				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
				no savepoint with the given name */
	THD*	thd,		/* in: handle to the MySQL thread of the user
				whose transaction should be rolled back */
1720
        void *savepoint)        /* in: savepoint data */
unknown's avatar
unknown committed
1721 1722 1723 1724
{
	ib_longlong mysql_binlog_cache_pos;
	int	    error = 0;
	trx_t*	    trx;
unknown's avatar
unknown committed
1725
        char 	    name[64];
unknown's avatar
unknown committed
1726 1727 1728 1729 1730

	DBUG_ENTER("innobase_rollback_to_savepoint");

	trx = check_trx_exists(thd);

unknown's avatar
unknown committed
1731 1732 1733
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */
unknown's avatar
unknown committed
1734 1735

	innobase_release_stat_resources(trx);
unknown's avatar
unknown committed
1736

1737
        /* TODO: use provided savepoint data area to store savepoint data */
unknown's avatar
unknown committed
1738 1739

        longlong2str((ulonglong)savepoint, name, 36);
1740

1741
        error = trx_rollback_to_savepoint_for_mysql(trx, name,
unknown's avatar
unknown committed
1742
						&mysql_binlog_cache_pos);
unknown's avatar
unknown committed
1743
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
1744 1745
}

unknown's avatar
unknown committed
1746 1747
/*********************************************************************
Release transaction savepoint name. */
unknown's avatar
unknown committed
1748 1749
static
int
unknown's avatar
unknown committed
1750
innobase_release_savepoint(
unknown's avatar
unknown committed
1751
/*=======================*/
unknown's avatar
unknown committed
1752 1753 1754 1755
				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
				no savepoint with the given name */
	THD*	thd,		/* in: handle to the MySQL thread of the user
				whose transaction should be rolled back */
unknown's avatar
unknown committed
1756
        void*	savepoint)      /* in: savepoint data */
unknown's avatar
unknown committed
1757 1758 1759
{
	int	    error = 0;
	trx_t*	    trx;
unknown's avatar
unknown committed
1760
        char 	    name[64];
unknown's avatar
unknown committed
1761

unknown's avatar
unknown committed
1762
	DBUG_ENTER("innobase_release_savepoint");
unknown's avatar
unknown committed
1763 1764 1765

	trx = check_trx_exists(thd);

unknown's avatar
unknown committed
1766
        /* TODO: use provided savepoint data area to store savepoint data */
unknown's avatar
unknown committed
1767 1768

        longlong2str((ulonglong)savepoint, name, 36);
1769

unknown's avatar
unknown committed
1770
	error = trx_release_savepoint_for_mysql(trx, name);
unknown's avatar
unknown committed
1771 1772 1773 1774

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

1775
/*********************************************************************
unknown's avatar
unknown committed
1776
Sets a transaction savepoint. */
unknown's avatar
unknown committed
1777 1778
static
int
unknown's avatar
unknown committed
1779 1780 1781 1782
innobase_savepoint(
/*===============*/
				/* out: always 0, that is, always succeeds */
	THD*	thd,		/* in: handle to the MySQL thread */
unknown's avatar
unknown committed
1783
        void*	savepoint)      /* in: savepoint data */
unknown's avatar
unknown committed
1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797
{
	int	error = 0;
	trx_t*	trx;

	DBUG_ENTER("innobase_savepoint");

	if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
		/* In the autocommit state there is no sense to set a
		savepoint: we return immediate success */
	        DBUG_RETURN(0);
	}

	trx = check_trx_exists(thd);

unknown's avatar
unknown committed
1798 1799 1800 1801 1802 1803
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

1804 1805
        /* cannot happen outside of transaction */
        DBUG_ASSERT(trx->active_trans);
unknown's avatar
unknown committed
1806

1807
        /* TODO: use provided savepoint data area to store savepoint data */
1808
        char name[64];
unknown's avatar
unknown committed
1809
        longlong2str((ulonglong)savepoint,name,36);
1810

1811
        error = trx_savepoint_for_mysql(trx, name, (ib_longlong)0);
unknown's avatar
unknown committed
1812 1813 1814 1815

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

1816
/*********************************************************************
unknown's avatar
unknown committed
1817
Frees a possible InnoDB trx object associated with the current THD. */
1818

1819
static int
1820 1821
innobase_close_connection(
/*======================*/
unknown's avatar
unknown committed
1822 1823
			/* out: 0 or error number */
	THD*	thd)	/* in: handle to the MySQL thread of the user
1824
			whose resources should be free'd */
1825
{
1826
        trx_free_for_mysql((trx_t*)thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
1827
	return(0);
1828
}
1829 1830 1831


/*****************************************************************************
1832
** InnoDB database tables
1833 1834
*****************************************************************************/

1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854
/********************************************************************
Get the record format from the data dictionary. */
enum row_type
ha_innobase::get_row_type() const
/*=============================*/
			/* out: ROW_TYPE_REDUNDANT or ROW_TYPE_COMPACT */
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;

	if (prebuilt && prebuilt->table) {
		if (prebuilt->table->comp) {
			return(ROW_TYPE_COMPACT);
		} else {
			return(ROW_TYPE_REDUNDANT);
		}
	}
	ut_ad(0);
	return(ROW_TYPE_NOT_USED);
}

1855
/********************************************************************
unknown's avatar
unknown committed
1856
Gives the file extension of an InnoDB single-table tablespace. */
1857 1858 1859 1860

const char**
ha_innobase::bas_ext() const
/*========================*/
unknown's avatar
unknown committed
1861
				/* out: file extension string */
1862
{
unknown's avatar
unknown committed
1863
	static const char* ext[] = {".ibd", NullS};
1864

1865 1866 1867
	return(ext);
}

1868 1869 1870
/*********************************************************************
Normalizes a table name string. A normalized name consists of the
database name catenated to '/' and table name. An example:
unknown's avatar
unknown committed
1871 1872
test/mytable. On Windows normalization puts both the database name and the
table name always to lower case. */
1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886
static
void
normalize_table_name(
/*=================*/
	char*		norm_name,	/* out: normalized name as a
					null-terminated string */
	const char*	name)		/* in: table name string */
{
	char*	name_ptr;
	char*	db_ptr;
	char*	ptr;

	/* Scan name from the end */

unknown's avatar
unknown committed
1887
	ptr = strend(name)-1;
1888 1889 1890 1891 1892 1893 1894

	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	name_ptr = ptr + 1;

unknown's avatar
unknown committed
1895
	DBUG_ASSERT(ptr > name);
1896 1897

	ptr--;
1898

1899 1900 1901 1902 1903 1904 1905 1906 1907
	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	db_ptr = ptr + 1;

	memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name));

	norm_name[name_ptr - db_ptr - 1] = '/';
unknown's avatar
unknown committed
1908 1909

#ifdef __WIN__
unknown's avatar
unknown committed
1910
	innobase_casedn_str(norm_name);
unknown's avatar
unknown committed
1911
#endif
1912
}
1913

1914
/*********************************************************************
unknown's avatar
unknown committed
1915
Creates and opens a handle to a table which already exists in an InnoDB
1916 1917 1918 1919 1920 1921 1922 1923
database. */

int
ha_innobase::open(
/*==============*/
					/* out: 1 if error, 0 if success */
	const char*	name,		/* in: table name */
	int 		mode,		/* in: not used */
unknown's avatar
unknown committed
1924
	uint 		test_if_locked)	/* in: not used */
1925
{
1926 1927
	dict_table_t*	ib_table;
  	char		norm_name[1000];
1928
	THD*		thd;
1929 1930 1931 1932 1933 1934

	DBUG_ENTER("ha_innobase::open");

	UT_NOT_USED(mode);
	UT_NOT_USED(test_if_locked);

1935
	thd = current_thd;
1936 1937
	normalize_table_name(norm_name, name);

1938 1939
	user_thd = NULL;

unknown's avatar
unknown committed
1940 1941
	last_query_id = (ulong)-1;

unknown's avatar
unknown committed
1942 1943 1944 1945
	if (!(share=get_share(name))) {

		DBUG_RETURN(1);
	}
1946

1947 1948 1949 1950
	/* Create buffers for packing the fields of a record. Why
	table->reclength did not work here? Obviously, because char
	fields when packed actually became 1 byte longer, when we also
	stored the string length as the first byte. */
1951

unknown's avatar
unknown committed
1952 1953
	upd_and_key_val_buff_len =
				table->s->reclength + table->s->max_key_length
1954
							+ MAX_REF_PARTS * 3;
1955
	if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME),
unknown's avatar
unknown committed
1956 1957
				     &upd_buff, upd_and_key_val_buff_len,
				     &key_val_buff, upd_and_key_val_buff_len,
1958
				     NullS)) {
1959
	  	free_share(share);
unknown's avatar
unknown committed
1960

1961
	  	DBUG_RETURN(1);
1962 1963
  	}

1964
	/* Get pointer to a table object in InnoDB dictionary cache */
1965

1966 1967 1968
	ib_table = dict_table_get_and_increment_handle_count(
				      		     norm_name, NULL);
 	if (NULL == ib_table) {
unknown's avatar
unknown committed
1969 1970 1971 1972 1973 1974 1975 1976 1977
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"Cannot find table %s from the internal data dictionary\n"
"of InnoDB though the .frm file for the table exists. Maybe you\n"
"have deleted and recreated InnoDB data files but have forgotten\n"
"to delete the corresponding .frm files of InnoDB tables, or you\n"
"have moved .frm files to another database?\n"
"Look from section 15.1 of http://www.innodb.com/ibman.html\n"
"how you can resolve the problem.\n",
1978
			  norm_name);
unknown's avatar
unknown committed
1979 1980 1981
	        free_share(share);
    		my_free((char*) upd_buff, MYF(0));
    		my_errno = ENOENT;
1982

1983
    		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
unknown's avatar
unknown committed
1984 1985
  	}

1986
 	if (ib_table->ibd_file_missing && !thd->tablespace_op) {
unknown's avatar
unknown committed
1987 1988 1989 1990 1991 1992 1993 1994 1995
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"MySQL is trying to open a table handle but the .ibd file for\n"
"table %s does not exist.\n"
"Have you deleted the .ibd file from the database directory under\n"
"the MySQL datadir, or have you used DISCARD TABLESPACE?\n"
"Look from section 15.1 of http://www.innodb.com/ibman.html\n"
"how you can resolve the problem.\n",
			  norm_name);
1996
	        free_share(share);
1997
    		my_free((char*) upd_buff, MYF(0));
1998
    		my_errno = ENOENT;
unknown's avatar
unknown committed
1999

2000
    		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
2001 2002
  	}

2003
	innobase_prebuilt = row_create_prebuilt(ib_table);
2004

unknown's avatar
unknown committed
2005 2006
	((row_prebuilt_t*)innobase_prebuilt)->mysql_row_len =
							table->s->reclength;
2007

unknown's avatar
unknown committed
2008 2009
	/* Looks like MySQL-3.23 sometimes has primary key number != 0 */

2010
 	primary_key = table->s->primary_key;
unknown's avatar
unknown committed
2011
	key_used_on_scan = primary_key;
2012

unknown's avatar
unknown committed
2013 2014 2015 2016 2017
	/* Allocate a buffer for a 'row reference'. A row reference is
	a string of bytes of length ref_length which uniquely specifies
        a row in our table. Note that MySQL may also compare two row
        references for equality by doing a simple memcmp on the strings
        of length ref_length! */
2018

unknown's avatar
unknown committed
2019
  	if (!row_table_got_default_clust_index(ib_table)) {
unknown's avatar
unknown committed
2020 2021 2022 2023 2024
	        if (primary_key >= MAX_KEY) {
	                fprintf(stderr,
		    "InnoDB: Error: table %s has a primary key in InnoDB\n"
		    "InnoDB: data dictionary, but not in MySQL!\n", name);
		}
2025 2026 2027

		((row_prebuilt_t*)innobase_prebuilt)
				->clust_index_was_generated = FALSE;
unknown's avatar
unknown committed
2028 2029 2030 2031 2032
 		/* MySQL allocates the buffer for ref. key_info->key_length
		includes space for all key columns + one byte for each column
		that may be NULL. ref_length must be as exact as possible to
		save space, because all row reference buffers are allocated
		based on ref_length. */
unknown's avatar
unknown committed
2033
 
unknown's avatar
unknown committed
2034
  		ref_length = table->key_info[primary_key].key_length;
2035
	} else {
unknown's avatar
unknown committed
2036 2037 2038
	        if (primary_key != MAX_KEY) {
	                fprintf(stderr,
		    "InnoDB: Error: table %s has no primary key in InnoDB\n"
unknown's avatar
unknown committed
2039 2040 2041 2042 2043 2044 2045 2046
		    "InnoDB: data dictionary, but has one in MySQL!\n"
		    "InnoDB: If you created the table with a MySQL\n"
                    "InnoDB: version < 3.23.54 and did not define a primary\n"
                    "InnoDB: key, but defined a unique key with all non-NULL\n"
                    "InnoDB: columns, then MySQL internally treats that key\n"
                    "InnoDB: as the primary key. You can fix this error by\n"
		    "InnoDB: dump + DROP + CREATE + reimport of the table.\n",
				name);
unknown's avatar
unknown committed
2047 2048
		}

2049 2050 2051
		((row_prebuilt_t*)innobase_prebuilt)
				->clust_index_was_generated = TRUE;

unknown's avatar
unknown committed
2052
  		ref_length = DATA_ROW_ID_LEN;
unknown's avatar
unknown committed
2053

unknown's avatar
unknown committed
2054 2055 2056 2057 2058 2059 2060
		/* If we automatically created the clustered index, then
		MySQL does not know about it, and MySQL must NOT be aware
		of the index used on scan, to make it avoid checking if we
		update the column of the index. That is why we assert below
		that key_used_on_scan is the undefined value MAX_KEY.
		The column is the row id in the automatical generation case,
		and it will never be updated anyway. */
unknown's avatar
unknown committed
2061 2062 2063 2064 2065
	       
		if (key_used_on_scan != MAX_KEY) {
	                fprintf(stderr,
"InnoDB: Warning: table %s key_used_on_scan is %lu even though there is no\n"
"InnoDB: primary key inside InnoDB.\n",
2066
				name, (ulong)key_used_on_scan);
unknown's avatar
unknown committed
2067
		}
2068
	}
2069

unknown's avatar
unknown committed
2070 2071 2072
	block_size = 16 * 1024;	/* Index block size in InnoDB: used by MySQL
				in query optimization */

unknown's avatar
Merge  
unknown committed
2073
	/* Init table lock structure */
2074
	thr_lock_data_init(&share->lock,&lock,(void*) 0);
2075 2076

  	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
2077

2078 2079 2080 2081
  	DBUG_RETURN(0);
}

/**********************************************************************
2082
Closes a handle to an InnoDB table. */
2083 2084 2085 2086 2087 2088 2089 2090 2091 2092

int
ha_innobase::close(void)
/*====================*/
				/* out: error number */
{
  	DBUG_ENTER("ha_innobase::close");

	row_prebuilt_free((row_prebuilt_t*) innobase_prebuilt);

2093
    	my_free((char*) upd_buff, MYF(0));
2094 2095
        free_share(share);

2096
	/* Tell InnoDB server that there might be work for
2097 2098 2099 2100 2101 2102 2103
	utility threads: */

	srv_active_wake_master_thread();

  	DBUG_RETURN(0);
}

unknown's avatar
unknown committed
2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176
/* The following accessor functions should really be inside MySQL code! */

/******************************************************************
Gets field offset for a field in a table. */
inline
uint
get_field_offset(
/*=============*/
			/* out: offset */
	TABLE*	table,	/* in: MySQL table object */
	Field*	field)	/* in: MySQL field object */
{
	return((uint) (field->ptr - (char*) table->record[0]));
}

/******************************************************************
Checks if a field in a record is SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
uint
field_in_record_is_null(
/*====================*/
			/* out: 1 if NULL, 0 otherwise */
	TABLE*	table,	/* in: MySQL table object */
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	if (!field->null_ptr) {

		return(0);
	}

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	if (record[null_offset] & field->null_bit) {

		return(1);
	}

	return(0);
}

/******************************************************************
Sets a field in a record to SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
void
set_field_in_record_to_null(
/*========================*/
	TABLE*	table,	/* in: MySQL table object */
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	record[null_offset] = record[null_offset] | field->null_bit;
}

/******************************************************************
Resets SQL NULL bits in a record to zero. */
inline
void
reset_null_bits(
/*============*/
	TABLE*	table,	/* in: MySQL table object */
	char*	record)	/* in: a row in MySQL format */
{
2177
	bzero(record, table->s->null_bytes);
unknown's avatar
unknown committed
2178 2179
}

2180 2181
extern "C" {
/*****************************************************************
unknown's avatar
unknown committed
2182 2183 2184 2185
InnoDB uses this function to compare two data fields for which the data type
is such that we must use MySQL code to compare them. NOTE that the prototype
of this function is in rem0cmp.c in InnoDB source code! If you change this
function, remember to update the prototype there! */
2186 2187 2188

int
innobase_mysql_cmp(
2189
/*===============*/
2190 2191
					/* out: 1, 0, -1, if a is greater,
					equal, less than b, respectively */
2192
	int		mysql_type,	/* in: MySQL type */
unknown's avatar
unknown committed
2193
	uint		charset_number,	/* in: number of the charset */
2194 2195 2196 2197 2198 2199 2200
	unsigned char*	a,		/* in: data field */
	unsigned int	a_length,	/* in: data field length,
					not UNIV_SQL_NULL */
	unsigned char*	b,		/* in: data field */
	unsigned int	b_length)	/* in: data field length,
					not UNIV_SQL_NULL */
{
unknown's avatar
unknown committed
2201
	CHARSET_INFO*		charset;
2202
	enum_field_types	mysql_tp;
2203
	int                     ret;
2204

unknown's avatar
unknown committed
2205 2206
	DBUG_ASSERT(a_length != UNIV_SQL_NULL);
	DBUG_ASSERT(b_length != UNIV_SQL_NULL);
2207 2208 2209 2210 2211

	mysql_tp = (enum_field_types) mysql_type;

	switch (mysql_tp) {

2212
	case MYSQL_TYPE_STRING:
2213
	case MYSQL_TYPE_VAR_STRING:
unknown's avatar
unknown committed
2214 2215 2216 2217
	case FIELD_TYPE_TINY_BLOB:
	case FIELD_TYPE_MEDIUM_BLOB:
	case FIELD_TYPE_BLOB:
	case FIELD_TYPE_LONG_BLOB:
2218
        case MYSQL_TYPE_VARCHAR:
unknown's avatar
unknown committed
2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238
		/* Use the charset number to pick the right charset struct for
		the comparison. Since the MySQL function get_charset may be
		slow before Bar removes the mutex operation there, we first
		look at 2 common charsets directly. */

		if (charset_number == default_charset_info->number) {
			charset = default_charset_info;
		} else if (charset_number == my_charset_latin1.number) {
			charset = &my_charset_latin1;
		} else {
			charset = get_charset(charset_number, MYF(MY_WME));

			if (charset == NULL) {
				fprintf(stderr,
"InnoDB: fatal error: InnoDB needs charset %lu for doing a comparison,\n"
"InnoDB: but MySQL cannot find that charset.\n", (ulong)charset_number);
				ut_a(0);
			}
		}

unknown's avatar
unknown committed
2239 2240 2241 2242 2243 2244
                /* Starting from 4.1.3, we use strnncollsp() in comparisons of
                non-latin1_swedish_ci strings. NOTE that the collation order
                changes then: 'b\0\0...' is ordered BEFORE 'b  ...'. Users
                having indexes on such data need to rebuild their tables! */

                ret = charset->coll->strnncollsp(charset,
unknown's avatar
unknown committed
2245
                                  a, a_length,
2246
                                                 b, b_length, 0);
2247
		if (ret < 0) {
2248
		        return(-1);
2249
		} else if (ret > 0) {
2250
		        return(1);
2251
		} else {
2252
		        return(0);
2253
	        }
2254 2255 2256 2257 2258 2259 2260 2261 2262
	default:
		assert(0);
	}

	return(0);
}
}

/******************************************************************
unknown's avatar
unknown committed
2263 2264 2265
Converts a MySQL type to an InnoDB type. Note that this function returns
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. */
2266 2267
inline
ulint
2268 2269 2270
get_innobase_type_from_mysql_type(
/*==============================*/
			/* out: DATA_BINARY, DATA_VARCHAR, ... */
2271 2272
	Field*	field)	/* in: MySQL field */
{
unknown's avatar
unknown committed
2273 2274 2275
	/* The following asserts try to check that the MySQL type code fits in
	8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
	the type */
2276 2277

	switch (field->type()) {
unknown's avatar
unknown committed
2278 2279
	        /* NOTE that we only allow string types in DATA_MYSQL
		and DATA_VARMYSQL */
unknown's avatar
unknown committed
2280 2281 2282
                case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
                case MYSQL_TYPE_VARCHAR:    /* new >= 5.0.3 true VARCHAR */
					if (field->binary()) {
2283 2284
						return(DATA_BINARY);
					} else if (strcmp(
unknown's avatar
unknown committed
2285 2286
						  field->charset()->name,
						 "latin1_swedish_ci") == 0) {
2287
						return(DATA_VARCHAR);
2288 2289
					} else {
						return(DATA_VARMYSQL);
2290
					}
2291
		case MYSQL_TYPE_STRING: if (field->binary()) {
2292 2293 2294

						return(DATA_FIXBINARY);
					} else if (strcmp(
unknown's avatar
unknown committed
2295 2296
						   field->charset()->name,
						   "latin1_swedish_ci") == 0) {
2297
						return(DATA_CHAR);
2298 2299
					} else {
						return(DATA_MYSQL);
2300
					}
unknown's avatar
unknown committed
2301 2302
                case FIELD_TYPE_NEWDECIMAL:
                                        return(DATA_BINARY);
2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313
		case FIELD_TYPE_LONG:
		case FIELD_TYPE_LONGLONG:
		case FIELD_TYPE_TINY:
		case FIELD_TYPE_SHORT:
		case FIELD_TYPE_INT24:
		case FIELD_TYPE_DATE:
		case FIELD_TYPE_DATETIME:
		case FIELD_TYPE_YEAR:
		case FIELD_TYPE_NEWDATE:
		case FIELD_TYPE_ENUM:
		case FIELD_TYPE_SET:
2314 2315 2316
		case FIELD_TYPE_TIME:
		case FIELD_TYPE_TIMESTAMP:
					return(DATA_INT);
2317
		case FIELD_TYPE_FLOAT:
2318
					return(DATA_FLOAT);
2319
		case FIELD_TYPE_DOUBLE:
2320
					return(DATA_DOUBLE);
2321
		case FIELD_TYPE_DECIMAL:
2322 2323 2324 2325 2326 2327
					return(DATA_DECIMAL);
		case FIELD_TYPE_TINY_BLOB:
		case FIELD_TYPE_MEDIUM_BLOB:
		case FIELD_TYPE_BLOB:
		case FIELD_TYPE_LONG_BLOB:
					return(DATA_BLOB);
2328 2329 2330 2331 2332 2333
		default:
					assert(0);
	}

	return(0);
}
2334

unknown's avatar
unknown committed
2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363
/***********************************************************************
Writes an unsigned integer value < 64k to 2 bytes, in the little-endian
storage format. */
inline
void
innobase_write_to_2_little_endian(
/*==============================*/
	byte*	buf,	/* in: where to store */
	ulint	val)	/* in: value to write, must be < 64k */
{
	ut_a(val < 256 * 256);

	buf[0] = (byte)(val & 0xFF);
	buf[1] = (byte)(val / 256);
}

/***********************************************************************
Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
storage format. */
inline
uint
innobase_read_from_2_little_endian(
/*===============================*/
			/* out: value */
	const mysql_byte*	buf)	/* in: from where to read */
{
	return((ulint)(buf[0]) + 256 * ((ulint)(buf[1])));
}

2364
/***********************************************************************
2365
Stores a key value for a row to a buffer. */
2366 2367 2368 2369 2370 2371 2372

uint
ha_innobase::store_key_val_for_row(
/*===============================*/
				/* out: key value length as stored in buff */
	uint 		keynr,	/* in: key number */
	char*		buff,	/* in/out: buffer for the key value (in MySQL
2373 2374
				format) */
	uint		buff_len,/* in: buffer length */
2375
	const mysql_byte* record)/* in: row in MySQL format */
2376 2377 2378 2379 2380
{
	KEY*		key_info 	= table->key_info + keynr;
  	KEY_PART_INFO*	key_part	= key_info->key_part;
  	KEY_PART_INFO*	end		= key_part + key_info->key_parts;
	char*		buff_start	= buff;
unknown's avatar
unknown committed
2381 2382 2383 2384 2385
	enum_field_types mysql_type;
	Field*		field;
	ulint		blob_len;
	byte*		blob_data;
	ibool		is_null;
2386

2387 2388
  	DBUG_ENTER("store_key_val_for_row");

unknown's avatar
unknown committed
2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402
	/* The format for storing a key field in MySQL is the following:

	1. If the column can be NULL, then in the first byte we put 1 if the
	field value is NULL, 0 otherwise.

	2. If the column is of a BLOB type (it must be a column prefix field
	in this case), then we put the length of the data in the field to the
	next 2 bytes, in the little-endian format. If the field is SQL NULL,
	then these 2 bytes are set to 0. Note that the length of data in the
	field is <= column prefix length.

	3. In a column prefix field, prefix_len next bytes are reserved for
	data. In a normal field the max field length next bytes are reserved
	for data. For a VARCHAR(n) the max field length is n. If the stored
unknown's avatar
unknown committed
2403
	value is the SQL NULL then these data bytes are set to 0.
unknown's avatar
unknown committed
2404

unknown's avatar
unknown committed
2405 2406 2407 2408 2409 2410
	4. We always use a 2 byte length for a true >= 5.0.3 VARCHAR. Note that
	in the MySQL row format, the length is stored in 1 or 2 bytes,
	depending on the maximum allowed length. But in the MySQL key value
	format, the length always takes 2 bytes.

	We have to zero-fill the buffer so that MySQL is able to use a
2411 2412
	simple memcmp to compare two key values to determine if they are
	equal. MySQL does this to compare contents of two 'ref' values. */
unknown's avatar
unknown committed
2413

2414
	bzero(buff, buff_len);
unknown's avatar
unknown committed
2415

2416
  	for (; key_part != end; key_part++) {
unknown's avatar
unknown committed
2417
	        is_null = FALSE;
2418 2419 2420 2421

    		if (key_part->null_bit) {
      			if (record[key_part->null_offset]
						& key_part->null_bit) {
unknown's avatar
unknown committed
2422 2423 2424 2425 2426 2427
				*buff = 1;
				is_null = TRUE;
      			} else {
				*buff = 0;
			}
			buff++;
2428
    		}
2429

unknown's avatar
unknown committed
2430 2431 2432
		field = key_part->field;
		mysql_type = field->type();

unknown's avatar
unknown committed
2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469
		if (mysql_type == MYSQL_TYPE_VARCHAR) {
						/* >= 5.0.3 true VARCHAR */
			ulint	lenlen;
			ulint	len;
			byte*	data;

			if (is_null) {
				buff += key_part->length + 2;
				
				continue;
			}

			lenlen = (ulint)
				(((Field_varstring*)field)->length_bytes);

			data = row_mysql_read_true_varchar(&len, 
				(byte*) (record
				+ (ulint)get_field_offset(table, field)),
				lenlen);
		
			/* The length in a key value is always stored in 2
			bytes */

			row_mysql_store_true_var_len((byte*)buff, len, 2);
			buff += 2;

			memcpy(buff, data, len);

			/* Note that we always reserve the maximum possible
			length of the true VARCHAR in the key value, though
			only len first bytes after the 2 length bytes contain
			actual data. The rest of the space was reset to zero
			in the bzero() call above. */

			buff += key_part->length;

		} else if (mysql_type == FIELD_TYPE_TINY_BLOB
unknown's avatar
unknown committed
2470 2471 2472
		    || mysql_type == FIELD_TYPE_MEDIUM_BLOB
		    || mysql_type == FIELD_TYPE_BLOB
		    || mysql_type == FIELD_TYPE_LONG_BLOB) {
2473

2474
			ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
unknown's avatar
unknown committed
2475 2476

		        if (is_null) {
unknown's avatar
unknown committed
2477
				buff += key_part->length + 2;
unknown's avatar
unknown committed
2478
				 
unknown's avatar
unknown committed
2479
				continue;
unknown's avatar
unknown committed
2480 2481 2482 2483
			}
		    
		        blob_data = row_mysql_read_blob_ref(&blob_len,
				(byte*) (record
unknown's avatar
unknown committed
2484
				+ (ulint)get_field_offset(table, field)),
unknown's avatar
unknown committed
2485 2486
					(ulint) field->pack_length());

unknown's avatar
unknown committed
2487 2488
			ut_a(get_field_offset(table, field)
						     == key_part->offset);
unknown's avatar
unknown committed
2489 2490 2491 2492 2493 2494 2495
			if (blob_len > key_part->length) {
			        blob_len = key_part->length;
			}

			/* MySQL reserves 2 bytes for the length and the
			storage of the number is little-endian */

unknown's avatar
unknown committed
2496 2497
			innobase_write_to_2_little_endian(
					(byte*)buff, (ulint)blob_len);
unknown's avatar
unknown committed
2498 2499 2500 2501
			buff += 2;

			memcpy(buff, blob_data, blob_len);

unknown's avatar
unknown committed
2502 2503 2504
			/* Note that we always reserve the maximum possible
			length of the BLOB prefix in the key value. */

unknown's avatar
unknown committed
2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515
			buff += key_part->length;
		} else {
		        if (is_null) {
				 buff += key_part->length;
				 
				 continue;
			}
			memcpy(buff, record + key_part->offset,
							key_part->length);
			buff += key_part->length;
		}
2516 2517
  	}

2518
	ut_a(buff <= buff_start + buff_len);
unknown's avatar
unknown committed
2519 2520

	DBUG_RETURN((uint)(buff - buff_start));
2521 2522 2523
}

/******************************************************************
unknown's avatar
unknown committed
2524 2525
Builds a 'template' to the prebuilt struct. The template is used in fast
retrieval of just those column values MySQL needs in its processing. */
unknown's avatar
unknown committed
2526
static
2527
void
2528 2529 2530 2531 2532 2533 2534 2535 2536
build_template(
/*===========*/
	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
	THD*		thd,		/* in: current user thread, used
					only if templ_type is
					ROW_MYSQL_REC_FIELDS */
	TABLE*		table,		/* in: MySQL table */
	ulint		templ_type)	/* in: ROW_MYSQL_WHOLE_ROW or
					ROW_MYSQL_REC_FIELDS */
2537
{
2538 2539
	dict_index_t*	index;
	dict_index_t*	clust_index;
2540
	mysql_row_templ_t* templ;
2541
	Field*		field;
2542 2543
	ulint		n_fields;
	ulint		n_requested_fields	= 0;
unknown's avatar
Merge  
unknown committed
2544
	ibool		fetch_all_in_key	= FALSE;
2545
	ibool		fetch_primary_key_cols	= FALSE;
2546
	ulint		i;
2547

unknown's avatar
unknown committed
2548 2549 2550 2551
	if (prebuilt->select_lock_type == LOCK_X) {
		/* We always retrieve the whole clustered index record if we
		use exclusive row level locks, for example, if the read is
		done in an UPDATE statement. */
unknown's avatar
unknown committed
2552

unknown's avatar
unknown committed
2553 2554 2555
	        templ_type = ROW_MYSQL_WHOLE_ROW;
	}

unknown's avatar
unknown committed
2556 2557 2558
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
	     if (prebuilt->hint_need_to_fetch_extra_cols
						== ROW_RETRIEVE_ALL_COLS) {
2559

unknown's avatar
unknown committed
2560 2561
		/* We know we must at least fetch all columns in the key, or
		all columns in the table */
unknown's avatar
unknown committed
2562

unknown's avatar
Merge  
unknown committed
2563
		if (prebuilt->read_just_key) {
unknown's avatar
unknown committed
2564
			/* MySQL has instructed us that it is enough to
2565 2566 2567 2568 2569
			fetch the columns in the key; looks like MySQL
			can set this flag also when there is only a
			prefix of the column in the key: in that case we
			retrieve the whole column from the clustered
			index */
unknown's avatar
unknown committed
2570

unknown's avatar
Merge  
unknown committed
2571 2572 2573 2574
			fetch_all_in_key = TRUE;
		} else {
			templ_type = ROW_MYSQL_WHOLE_ROW;
		}
unknown's avatar
unknown committed
2575 2576
	    } else if (prebuilt->hint_need_to_fetch_extra_cols
						== ROW_RETRIEVE_PRIMARY_KEY) {
unknown's avatar
unknown committed
2577 2578 2579 2580 2581
		/* We must at least fetch all primary key cols. Note that if
		the clustered index was internally generated by InnoDB on the
		row id (no primary key was defined), then
		row_search_for_mysql() will always retrieve the row id to a
		special buffer in the prebuilt struct. */
unknown's avatar
unknown committed
2582 2583 2584

		fetch_primary_key_cols = TRUE;
	    }
2585 2586
	}

unknown's avatar
unknown committed
2587
	clust_index = dict_table_get_first_index_noninline(prebuilt->table);
unknown's avatar
unknown committed
2588

2589
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
unknown's avatar
unknown committed
2590
		index = prebuilt->index;
2591 2592
	} else {
		index = clust_index;
2593
	}
2594

2595 2596 2597 2598 2599 2600 2601
	if (index == clust_index) {
		prebuilt->need_to_access_clustered = TRUE;
	} else {
		prebuilt->need_to_access_clustered = FALSE;
		/* Below we check column by column if we need to access
		the clustered index */
	}
2602

2603
	n_fields = (ulint)table->s->fields; /* number of columns */
2604 2605 2606 2607 2608 2609

	if (!prebuilt->mysql_template) {
		prebuilt->mysql_template = (mysql_row_templ_t*)
						mem_alloc_noninline(
					n_fields * sizeof(mysql_row_templ_t));
	}
2610

2611
	prebuilt->template_type = templ_type;
2612
	prebuilt->null_bitmap_len = table->s->null_bytes;
2613

2614 2615
	prebuilt->templ_contains_blob = FALSE;

unknown's avatar
unknown committed
2616 2617
	/* Note that in InnoDB, i is the column number. MySQL calls columns
	'fields'. */
2618
	for (i = 0; i < n_fields; i++) {
2619
		templ = prebuilt->mysql_template + n_requested_fields;
2620 2621
		field = table->field[i];

2622 2623
                ibool index_contains_field=
                  dict_index_contains_col_or_prefix(index, i);
2624 2625 2626

		if (templ_type == ROW_MYSQL_REC_FIELDS && 
                    ((prebuilt->read_just_key && !index_contains_field) ||
2627 2628 2629 2630
		     (!(fetch_all_in_key && index_contains_field) &&
		      !(fetch_primary_key_cols &&
			dict_table_col_in_clustered_key(index->table, i)) &&
		      thd->query_id != field->query_id))) {
unknown's avatar
unknown committed
2631 2632

			/* This field is not needed in the query, skip it */
2633 2634 2635 2636 2637

			goto skip_field;
		}

		n_requested_fields++;
2638

2639
		templ->col_no = i;
2640

2641 2642 2643
		if (index == clust_index) {
			templ->rec_field_no = (index->table->cols + i)
								->clust_pos;
2644
		} else {
2645 2646
			templ->rec_field_no = dict_index_get_nth_col_pos(
								index, i);
2647 2648
		}

2649 2650 2651 2652 2653 2654 2655 2656
		if (templ->rec_field_no == ULINT_UNDEFINED) {
			prebuilt->need_to_access_clustered = TRUE;
		}

		if (field->null_ptr) {
			templ->mysql_null_byte_offset =
				(ulint) ((char*) field->null_ptr
					- (char*) table->record[0]);
2657

2658 2659 2660 2661
			templ->mysql_null_bit_mask = (ulint) field->null_bit;
		} else {
			templ->mysql_null_bit_mask = 0;
		}
2662

unknown's avatar
unknown committed
2663 2664 2665
		templ->mysql_col_offset = (ulint)
					get_field_offset(table, field);

2666 2667
		templ->mysql_col_len = (ulint) field->pack_length();
		templ->type = get_innobase_type_from_mysql_type(field);
unknown's avatar
unknown committed
2668 2669 2670 2671 2672 2673 2674
		templ->mysql_type = (ulint)field->type();

		if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
			templ->mysql_length_bytes = (ulint)
				    (((Field_varstring*)field)->length_bytes);
		}
	
2675 2676
		templ->charset = dtype_get_charset_coll_noninline(
				index->table->cols[i].type.prtype);
2677 2678
		templ->mbminlen = index->table->cols[i].type.mbminlen;
		templ->mbmaxlen = index->table->cols[i].type.mbmaxlen;
2679
		templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
2680

2681 2682
		if (templ->type == DATA_BLOB) {
			prebuilt->templ_contains_blob = TRUE;
2683
		}
2684 2685 2686
skip_field:
		;
	}
2687

2688
	prebuilt->n_template = n_requested_fields;
2689

unknown's avatar
unknown committed
2690
	if (index != clust_index && prebuilt->need_to_access_clustered) {
2691 2692 2693 2694
		/* Change rec_field_no's to correspond to the clustered index
		record */
		for (i = 0; i < n_requested_fields; i++) {
			templ = prebuilt->mysql_template + i;
2695

2696 2697 2698
			templ->rec_field_no =
			    (index->table->cols + templ->col_no)->clust_pos;
		}
2699
	}
2700 2701 2702
}

/************************************************************************
2703
Stores a row in an InnoDB database, to the table specified in this
2704 2705 2706 2707 2708
handle. */

int
ha_innobase::write_row(
/*===================*/
2709 2710
				/* out: error code */
	mysql_byte* 	record)	/* in: a row in MySQL format */
2711
{
2712
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
2713
  	int 		error;
2714
	longlong	auto_inc;
unknown's avatar
unknown committed
2715
	longlong	dummy;
unknown's avatar
unknown committed
2716
	ibool           auto_inc_used= FALSE;
unknown's avatar
unknown committed
2717

2718
  	DBUG_ENTER("ha_innobase::write_row");
2719

unknown's avatar
unknown committed
2720
	if (prebuilt->trx !=
2721
                        (trx_t*) current_thd->ha_data[innobase_hton.slot]) {
unknown's avatar
unknown committed
2722 2723
		fprintf(stderr,
"InnoDB: Error: the transaction object for the table handle is at\n"
2724 2725
"InnoDB: %p, but for the current thread it is at %p\n",
			prebuilt->trx,
2726
                        (trx_t*) current_thd->ha_data[innobase_hton.slot]);
2727 2728 2729 2730 2731 2732
		fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
		ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200);
		fputs("\n"
			"InnoDB: Dump of 200 bytes around transaction.all: ",
			stderr);
		ut_print_buf(stderr,
unknown's avatar
unknown committed
2733 2734
           	 ((byte*)(&(current_thd->ha_data[innobase_hton.slot]))) - 100,
								200);
2735 2736
		putc('\n', stderr);
		ut_error;
unknown's avatar
unknown committed
2737
	}
unknown's avatar
unknown committed
2738

2739 2740
  	statistic_increment(current_thd->status_var.ha_write_count,
			    &LOCK_status);
2741

2742 2743
        if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
                table->timestamp_field->set_time();
2744

2745
	if ((user_thd->lex->sql_command == SQLCOM_ALTER_TABLE
unknown's avatar
unknown committed
2746
	    || user_thd->lex->sql_command == SQLCOM_OPTIMIZE
2747 2748
	    || user_thd->lex->sql_command == SQLCOM_CREATE_INDEX
	    || user_thd->lex->sql_command == SQLCOM_DROP_INDEX)
unknown's avatar
unknown committed
2749
	    && num_write_row >= 10000) {
2750 2751 2752 2753 2754 2755 2756 2757
		/* ALTER TABLE is COMMITted at every 10000 copied rows.
		The IX table lock for the original table has to be re-issued.
		As this method will be called on a temporary table where the
		contents of the original table is being copied to, it is
		a bit tricky to determine the source table.  The cursor
		position in the source table need not be adjusted after the
		intermediate COMMIT, since writes by other transactions are
		being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */
2758

2759
		dict_table_t*	src_table;
2760 2761
		ibool		mode;

2762
		num_write_row = 0;
2763

unknown's avatar
unknown committed
2764 2765
		/* Commit the transaction.  This will release the table
		locks, so they have to be acquired again. */
2766 2767 2768 2769 2770 2771

		/* Altering an InnoDB table */
		/* Get the source table. */
		src_table = lock_get_src_table(
				prebuilt->trx, prebuilt->table, &mode);
		if (!src_table) {
unknown's avatar
unknown committed
2772
no_commit:
2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785
			/* Unknown situation: do not commit */
			/*
			ut_print_timestamp(stderr);
			fprintf(stderr,
				"  InnoDB error: ALTER TABLE is holding lock"
				" on %lu tables!\n",
				prebuilt->trx->mysql_n_tables_locked);
			*/
			;
		} else if (src_table == prebuilt->table) {
			/* Source table is not in InnoDB format:
			no need to re-acquire locks on it. */

2786
			/* Altering to InnoDB format */
unknown's avatar
unknown committed
2787
                        innobase_commit(user_thd, 1);
2788
			/* Note that this transaction is still active. */
2789
			prebuilt->trx->active_trans = 1;
2790 2791
			/* We will need an IX lock on the destination table. */
		        prebuilt->sql_stat_start = TRUE;
2792 2793 2794
		} else {
			/* Ensure that there are no other table locks than
			LOCK_IX and LOCK_AUTO_INC on the destination table. */
unknown's avatar
unknown committed
2795

2796 2797
			if (!lock_is_table_exclusive(prebuilt->table,
							prebuilt->trx)) {
2798 2799 2800 2801 2802
				goto no_commit;
			}

			/* Commit the transaction.  This will release the table
			locks, so they have to be acquired again. */
unknown's avatar
unknown committed
2803
                        innobase_commit(user_thd, 1);
2804
			/* Note that this transaction is still active. */
2805
			prebuilt->trx->active_trans = 1;
2806
			/* Re-acquire the table lock on the source table. */
2807
			row_lock_table_for_mysql(prebuilt, src_table, mode);
2808 2809 2810
			/* We will need an IX lock on the destination table. */
		        prebuilt->sql_stat_start = TRUE;
		}
2811 2812
	}

unknown's avatar
unknown committed
2813 2814
	num_write_row++;

unknown's avatar
unknown committed
2815 2816 2817
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
2818 2819

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
2820 2821
	}

2822
  	if (table->next_number_field && record == table->record[0]) {
unknown's avatar
unknown committed
2823 2824
		/* This is the case where the table has an
		auto-increment column */
unknown's avatar
unknown committed
2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849

		/* Initialize the auto-inc counter if it has not been
		initialized yet */

		if (0 == dict_table_autoinc_peek(prebuilt->table)) {

			/* This call initializes the counter */
		        error = innobase_read_and_init_auto_inc(&dummy);

			if (error) {
				/* Deadlock or lock wait timeout */

				goto func_exit;
			}

			/* We have to set sql_stat_start to TRUE because
			the above call probably has called a select, and
			has reset that flag; row_insert_for_mysql has to
			know to set the IX intention lock on the table,
			something it only does at the start of each
			statement */

			prebuilt->sql_stat_start = TRUE;
		}

2850 2851 2852 2853 2854 2855
		/*
                  We must use the handler code to update the auto-increment
                  value to be sure that increment it correctly.
                */
    		update_auto_increment();
                auto_inc_used= 1;
unknown's avatar
unknown committed
2856

2857
	}
2858

2859 2860 2861 2862
	if (prebuilt->mysql_template == NULL
			|| prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
		/* Build the template used in converting quickly between
		the two database formats */
2863

2864 2865
		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}
2866

2867
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2868

2869
	error = row_insert_for_mysql((byte*) record, prebuilt);
unknown's avatar
Merge  
unknown committed
2870

2871
	if (error == DB_SUCCESS && auto_inc_used) {
unknown's avatar
Merge  
unknown committed
2872

unknown's avatar
unknown committed
2873
        	/* Fetch the value that was set in the autoincrement field */
unknown's avatar
unknown committed
2874

unknown's avatar
unknown committed
2875
          	auto_inc = table->next_number_field->val_int();
2876

unknown's avatar
unknown committed
2877
          	if (auto_inc != 0) {
unknown's avatar
unknown committed
2878 2879 2880
			/* This call will calculate the max of the current
			value and the value supplied by the user and
			update the counter accordingly */
2881 2882 2883 2884 2885 2886 2887 2888

			/* We have to use the transactional lock mechanism
			on the auto-inc counter of the table to ensure
			that replication and roll-forward of the binlog
			exactly imitates also the given auto-inc values.
			The lock is released at each SQL statement's
			end. */

unknown's avatar
unknown committed
2889
            		error = row_lock_table_autoinc_for_mysql(prebuilt);
2890

unknown's avatar
unknown committed
2891 2892 2893 2894 2895 2896 2897
            		if (error != DB_SUCCESS) {
              			error = convert_error_code_to_mysql(error,
								user_thd);
              			goto func_exit;
            		}
            		dict_table_autoinc_update(prebuilt->table, auto_inc);
          	}
2898
        }
2899

unknown's avatar
unknown committed
2900
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2901

unknown's avatar
unknown committed
2902
	error = convert_error_code_to_mysql(error, user_thd);
2903

2904
	/* Tell InnoDB server that there might be work for
2905
	utility threads: */
2906
func_exit:
2907
	innobase_active_small();
2908 2909 2910 2911

  	DBUG_RETURN(error);
}

2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922
/**************************************************************************
Checks which fields have changed in a row and stores information
of them to an update vector. */
static
int
calc_row_difference(
/*================*/
					/* out: error number or 0 */
	upd_t*		uvect,		/* in/out: update vector */
	mysql_byte* 	old_row,	/* in: old row in MySQL format */
	mysql_byte* 	new_row,	/* in: new row in MySQL format */
unknown's avatar
unknown committed
2923 2924
	struct st_table* table,		/* in: table in MySQL data
					dictionary */
2925
	mysql_byte*	upd_buff,	/* in: buffer to use */
unknown's avatar
unknown committed
2926
	ulint		buff_len,	/* in: buffer length */
2927
	row_prebuilt_t*	prebuilt,	/* in: InnoDB prebuilt struct */
2928 2929
	THD*		thd)		/* in: user thread */
{
unknown's avatar
unknown committed
2930
	mysql_byte*	original_upd_buff = upd_buff;
2931
	Field*		field;
unknown's avatar
unknown committed
2932
	enum_field_types field_mysql_type;
2933 2934 2935
	uint		n_fields;
	ulint		o_len;
	ulint		n_len;
unknown's avatar
unknown committed
2936
	ulint		col_pack_len;
unknown's avatar
unknown committed
2937
	byte*		new_mysql_row_col;
unknown's avatar
unknown committed
2938 2939 2940
	byte*	        o_ptr;
        byte*	        n_ptr;
        byte*	        buf;
2941
	upd_field_t*	ufield;
2942 2943
	ulint		col_type;
	ulint		is_unsigned;
2944
	ulint		n_changed = 0;
unknown's avatar
unknown committed
2945
	dfield_t	dfield;
2946
	uint		i;
2947

2948
	n_fields = table->s->fields;
2949

2950
	/* We use upd_buff to convert changed fields */
unknown's avatar
unknown committed
2951
	buf = (byte*) upd_buff;
2952

2953 2954 2955
	for (i = 0; i < n_fields; i++) {
		field = table->field[i];

2956
		/* if (thd->query_id != field->query_id) { */
2957 2958
			/* TODO: check that these fields cannot have
			changed! */
2959

2960 2961
		/*	goto skip_field;
		}*/
2962

unknown's avatar
unknown committed
2963 2964
		o_ptr = (byte*) old_row + get_field_offset(table, field);
		n_ptr = (byte*) new_row + get_field_offset(table, field);
unknown's avatar
unknown committed
2965
		
unknown's avatar
unknown committed
2966 2967 2968
		/* Use new_mysql_row_col and col_pack_len save the values */

		new_mysql_row_col = n_ptr;
unknown's avatar
unknown committed
2969
		col_pack_len = field->pack_length();
unknown's avatar
unknown committed
2970

unknown's avatar
unknown committed
2971 2972
		o_len = col_pack_len;
		n_len = col_pack_len;
2973

unknown's avatar
unknown committed
2974 2975 2976
		/* We use o_ptr and n_ptr to dig up the actual data for
		comparison. */ 

unknown's avatar
unknown committed
2977 2978
		field_mysql_type = field->type();
	
2979
		col_type = get_innobase_type_from_mysql_type(field);
2980
		is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
2981 2982 2983 2984 2985 2986

		switch (col_type) {

		case DATA_BLOB:
			o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
			n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
unknown's avatar
unknown committed
2987

2988
			break;
unknown's avatar
unknown committed
2989

2990 2991 2992
		case DATA_VARCHAR:
		case DATA_BINARY:
		case DATA_VARMYSQL:
unknown's avatar
unknown committed
2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009
			if (field_mysql_type == MYSQL_TYPE_VARCHAR) {
				/* This is a >= 5.0.3 type true VARCHAR where
				the real payload data length is stored in
				1 or 2 bytes */
			
				o_ptr = row_mysql_read_true_varchar(
						&o_len, o_ptr,
				    (ulint)
				    (((Field_varstring*)field)->length_bytes));
								
				n_ptr = row_mysql_read_true_varchar(
						&n_len, n_ptr,
				    (ulint)
				    (((Field_varstring*)field)->length_bytes));
			}

			break;
3010 3011 3012
		default:
			;
		}
3013

3014
		if (field->null_ptr) {
unknown's avatar
unknown committed
3015 3016
			if (field_in_record_is_null(table, field,
							(char*) old_row)) {
3017 3018
				o_len = UNIV_SQL_NULL;
			}
3019

unknown's avatar
unknown committed
3020 3021
			if (field_in_record_is_null(table, field,
							(char*) new_row)) {
3022 3023 3024 3025 3026 3027 3028 3029 3030
				n_len = UNIV_SQL_NULL;
			}
		}

		if (o_len != n_len || (o_len != UNIV_SQL_NULL &&
					0 != memcmp(o_ptr, n_ptr, o_len))) {
			/* The field has changed */

			ufield = uvect->fields + n_changed;
unknown's avatar
unknown committed
3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041
	
			/* Let us use a dummy dfield to make the conversion
			from the MySQL column format to the InnoDB format */

			dfield.type = (prebuilt->table->cols + i)->type;

			if (n_len != UNIV_SQL_NULL) {
				buf = row_mysql_store_col_in_innobase_format(
						&dfield,
						(byte*)buf,
						TRUE,
unknown's avatar
unknown committed
3042
						new_mysql_row_col,
unknown's avatar
unknown committed
3043 3044
						col_pack_len,
						prebuilt->table->comp);
unknown's avatar
unknown committed
3045 3046
				ufield->new_val.data = dfield.data;
				ufield->new_val.len = dfield.len;
unknown's avatar
unknown committed
3047 3048 3049 3050
			} else {
				ufield->new_val.data = NULL;
				ufield->new_val.len = UNIV_SQL_NULL;
			}
3051 3052

			ufield->exp = NULL;
3053 3054
			ufield->field_no =
					(prebuilt->table->cols + i)->clust_pos;
3055 3056 3057 3058 3059 3060 3061
			n_changed++;
		}
	}

	uvect->n_fields = n_changed;
	uvect->info_bits = 0;

unknown's avatar
unknown committed
3062 3063
	ut_a(buf <= (byte*)original_upd_buff + buff_len);

3064 3065 3066 3067 3068 3069 3070
	return(0);
}

/**************************************************************************
Updates a row given as a parameter to a new value. Note that we are given
whole rows, not just the fields which are updated: this incurs some
overhead for CPU when we check which fields are actually updated.
3071
TODO: currently InnoDB does not prevent the 'Halloween problem':
3072 3073
in a searched update a single row can get updated several times
if its index columns are updated! */
3074

3075 3076 3077 3078
int
ha_innobase::update_row(
/*====================*/
					/* out: error number or 0 */
3079 3080
	const mysql_byte* 	old_row,/* in: old row in MySQL format */
	mysql_byte* 		new_row)/* in: new row in MySQL format */
3081 3082 3083 3084 3085
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	upd_t*		uvect;
	int		error = 0;

3086
	DBUG_ENTER("ha_innobase::update_row");
3087

unknown's avatar
unknown committed
3088
	ut_ad(prebuilt->trx ==
3089
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
3090

3091 3092
        if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
                table->timestamp_field->set_time();
3093

unknown's avatar
unknown committed
3094 3095 3096
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
3097 3098

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
3099 3100
	}

3101 3102 3103 3104 3105
	if (prebuilt->upd_node) {
		uvect = prebuilt->upd_node->update;
	} else {
		uvect = row_get_prebuilt_update_vector(prebuilt);
	}
3106 3107 3108 3109

	/* Build an update vector from the modified fields in the rows
	(uses upd_buff of the handle) */

3110
	calc_row_difference(uvect, (mysql_byte*) old_row, new_row, table,
unknown's avatar
unknown committed
3111 3112 3113
			upd_buff, (ulint)upd_and_key_val_buff_len,
			prebuilt, user_thd);

3114 3115 3116
	/* This is not a delete */
	prebuilt->upd_node->is_delete = FALSE;

unknown's avatar
unknown committed
3117
	assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
3118

unknown's avatar
unknown committed
3119
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
3120

3121
	error = row_update_for_mysql((byte*) old_row, prebuilt);
3122

unknown's avatar
unknown committed
3123
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
3124

unknown's avatar
unknown committed
3125
	error = convert_error_code_to_mysql(error, user_thd);
3126

3127
	/* Tell InnoDB server that there might be work for
3128 3129
	utility threads: */

3130
	innobase_active_small();
3131 3132 3133 3134 3135 3136 3137 3138 3139 3140

	DBUG_RETURN(error);
}

/**************************************************************************
Deletes a row given as the parameter. */

int
ha_innobase::delete_row(
/*====================*/
3141 3142
					/* out: error number or 0 */
	const mysql_byte* record)	/* in: a row in MySQL format */
3143 3144 3145 3146
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int		error = 0;

3147
	DBUG_ENTER("ha_innobase::delete_row");
3148

unknown's avatar
unknown committed
3149
	ut_ad(prebuilt->trx ==
3150
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
3151

unknown's avatar
unknown committed
3152 3153 3154
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
3155 3156

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
3157 3158
	}

3159 3160 3161
	if (!prebuilt->upd_node) {
		row_get_prebuilt_update_vector(prebuilt);
	}
3162 3163

	/* This is a delete */
3164

3165
	prebuilt->upd_node->is_delete = TRUE;
3166

unknown's avatar
unknown committed
3167
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
3168

3169
	error = row_update_for_mysql((byte*) record, prebuilt);
3170

unknown's avatar
unknown committed
3171
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
3172

unknown's avatar
unknown committed
3173
	error = convert_error_code_to_mysql(error, user_thd);
3174

3175
	/* Tell the InnoDB server that there might be work for
3176 3177
	utility threads: */

3178
	innobase_active_small();
3179 3180 3181 3182

	DBUG_RETURN(error);
}

3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205
/**************************************************************************
Deletes a lock set to a row */

void
ha_innobase::unlock_row(void)
/*=========================*/
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;

	DBUG_ENTER("ha_innobase::unlock_row");

	if (last_query_id != user_thd->query_id) {
		ut_print_timestamp(stderr);
		fprintf(stderr,
"  InnoDB: Error: last_query_id is %lu != user_thd_query_id is %lu\n",
			(ulong)last_query_id, (ulong)user_thd->query_id);
		mem_analyze_corruption((byte *) prebuilt->trx);
		ut_error;
	}

	row_unlock_for_mysql(prebuilt);
}

3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217
/**********************************************************************
Initializes a handle to use an index. */

int
ha_innobase::index_init(
/*====================*/
			/* out: 0 or error number */
	uint 	keynr)	/* in: key (index) number */
{
	int 	error	= 0;
  	DBUG_ENTER("index_init");

unknown's avatar
Merge  
unknown committed
3218
	error = change_active_index(keynr);
3219 3220 3221 3222 3223

  	DBUG_RETURN(error);
}

/**********************************************************************
3224
Currently does nothing. */
3225 3226 3227 3228 3229 3230 3231

int
ha_innobase::index_end(void)
/*========================*/
{
	int 	error	= 0;
  	DBUG_ENTER("index_end");
unknown's avatar
unknown committed
3232
        active_index=MAX_KEY;
3233 3234 3235 3236 3237
  	DBUG_RETURN(error);
}

/*************************************************************************
Converts a search mode flag understood by MySQL to a flag understood
3238
by InnoDB. */
3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252
inline
ulint
convert_search_mode_to_innobase(
/*============================*/
	enum ha_rkey_function	find_flag)
{
	switch (find_flag) {
  		case HA_READ_KEY_EXACT:		return(PAGE_CUR_GE);
  			/* the above does not require the index to be UNIQUE */
  		case HA_READ_KEY_OR_NEXT:	return(PAGE_CUR_GE);
		case HA_READ_KEY_OR_PREV:	return(PAGE_CUR_LE);
		case HA_READ_AFTER_KEY:		return(PAGE_CUR_G);
		case HA_READ_BEFORE_KEY:	return(PAGE_CUR_L);
		case HA_READ_PREFIX:		return(PAGE_CUR_GE);
unknown's avatar
unknown committed
3253
	        case HA_READ_PREFIX_LAST:       return(PAGE_CUR_LE);
unknown's avatar
unknown committed
3254 3255 3256
                case HA_READ_PREFIX_LAST_OR_PREV:return(PAGE_CUR_LE);
		  /* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always
		  pass a complete-field prefix of a key value as the search
unknown's avatar
unknown committed
3257 3258 3259 3260 3261
		  tuple. I.e., it is not allowed that the last field would
		  just contain n first bytes of the full field value.
		  MySQL uses a 'padding' trick to convert LIKE 'abc%'
		  type queries so that it can use as a search tuple
		  a complete-field-prefix of a key value. Thus, the InnoDB
unknown's avatar
unknown committed
3262 3263 3264 3265 3266 3267 3268
		  search mode PAGE_CUR_LE_OR_EXTENDS is never used.
		  TODO: when/if MySQL starts to use also partial-field
		  prefixes, we have to deal with stripping of spaces
		  and comparison of non-latin1 char type fields in
		  innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to
		  work correctly. */

3269 3270 3271 3272 3273
		default:			assert(0);
	}

	return(0);
}
3274

unknown's avatar
unknown committed
3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323
/*
   BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
   ---------------------------------------------------
The following does not cover all the details, but explains how we determine
the start of a new SQL statement, and what is associated with it.

For each table in the database the MySQL interpreter may have several
table handle instances in use, also in a single SQL query. For each table
handle instance there is an InnoDB  'prebuilt' struct which contains most
of the InnoDB data associated with this table handle instance.

  A) if the user has not explicitly set any MySQL table level locks:

  1) MySQL calls ::external_lock to set an 'intention' table level lock on
the table of the handle instance. There we set
prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
true if we are taking this table handle instance to use in a new SQL
statement issued by the user. We also increment trx->n_mysql_tables_in_use.

  2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
instructions to prebuilt->template of the table handle instance in
::index_read. The template is used to save CPU time in large joins.

  3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
allocate a new consistent read view for the trx if it does not yet have one,
or in the case of a locking read, set an InnoDB 'intention' table level
lock on the table.

  4) We do the SELECT. MySQL may repeatedly call ::index_read for the
same table handle instance, if it is a join.

  5) When the SELECT ends, MySQL removes its intention table level locks
in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
 (a) we execute a COMMIT there if the autocommit is on,
 (b) we also release possible 'SQL statement level resources' InnoDB may
have for this SQL statement. The MySQL interpreter does NOT execute
autocommit for pure read transactions, though it should. That is why the
table handler in that case has to execute the COMMIT in ::external_lock.

  B) If the user has explicitly set MySQL table level locks, then MySQL
does NOT call ::external_lock at the start of the statement. To determine
when we are at the start of a new SQL statement we at the start of
::index_read also compare the query id to the latest query id where the
table handle instance was used. If it has changed, we know we are at the
start of a new SQL statement. Since the query id can theoretically
overwrap, we use this test only as a secondary way of determining the
start of a new SQL statement. */


3324 3325 3326 3327 3328 3329 3330 3331 3332
/**************************************************************************
Positions an index cursor to the index specified in the handle. Fetches the
row if any. */

int
ha_innobase::index_read(
/*====================*/
					/* out: 0, HA_ERR_KEY_NOT_FOUND,
					or error number */
3333
	mysql_byte*		buf,	/* in/out: buffer for the returned
3334
					row */
3335
	const mysql_byte* 	key_ptr,/* in: key value; if this is NULL
3336
					we position the cursor at the
unknown's avatar
unknown committed
3337 3338 3339
					start or end of index; this can
					also contain an InnoDB row id, in
					which case key_len is the InnoDB
unknown's avatar
unknown committed
3340 3341 3342 3343
					row id length; the key value can
					also be a prefix of a full key value,
					and the last column can be a prefix
					of a full column */
3344
	uint			key_len,/* in: key value length */
3345 3346 3347 3348 3349 3350 3351 3352 3353 3354
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	ulint		mode;
	dict_index_t*	index;
	ulint		match_mode 	= 0;
	int 		error;
	ulint		ret;

  	DBUG_ENTER("index_read");
unknown's avatar
unknown committed
3355

unknown's avatar
unknown committed
3356
	ut_ad(prebuilt->trx ==
3357
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
3358

3359 3360
  	statistic_increment(current_thd->status_var.ha_read_key_count,
			    &LOCK_status);
3361

unknown's avatar
unknown committed
3362 3363 3364
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
3365 3366

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
3367 3368
	}

3369
	index = prebuilt->index;
3370

unknown's avatar
unknown committed
3371 3372
	/* Note that if the index for which the search template is built is not
        necessarily prebuilt->index, but can also be the clustered index */
3373

3374 3375 3376 3377
	if (prebuilt->sql_stat_start) {
		build_template(prebuilt, user_thd, table,
							ROW_MYSQL_REC_FIELDS);
	}
3378 3379

	if (key_ptr) {
unknown's avatar
unknown committed
3380 3381 3382
	        /* Convert the search key value to InnoDB format into
		prebuilt->search_tuple */

3383
		row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple,
unknown's avatar
unknown committed
3384 3385 3386 3387
					(byte*) key_val_buff,
					(ulint)upd_and_key_val_buff_len,
					index,
					(byte*) key_ptr,
3388
					(ulint) key_len, prebuilt->trx);
3389 3390 3391 3392 3393 3394
	} else {
		/* We position the cursor to the last or the first entry
		in the index */

 		dtuple_set_n_fields(prebuilt->search_tuple, 0);
	}
3395

3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409
	mode = convert_search_mode_to_innobase(find_flag);

	match_mode = 0;

	if (find_flag == HA_READ_KEY_EXACT) {
		match_mode = ROW_SEL_EXACT;

	} else if (find_flag == HA_READ_PREFIX
				|| find_flag == HA_READ_PREFIX_LAST) {
		match_mode = ROW_SEL_EXACT_PREFIX;
	}

	last_match_mode = match_mode;

unknown's avatar
unknown committed
3410
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
3411

unknown's avatar
unknown committed
3412
	ret = row_search_for_mysql((byte*) buf, mode, prebuilt, match_mode, 0);
3413

unknown's avatar
unknown committed
3414
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
3415

3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427
	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;
	} else {
unknown's avatar
unknown committed
3428
		error = convert_error_code_to_mysql(ret, user_thd);
3429 3430
		table->status = STATUS_NOT_FOUND;
	}
3431

3432 3433 3434
	DBUG_RETURN(error);
}

unknown's avatar
unknown committed
3435 3436 3437
/***********************************************************************
The following functions works like index_read, but it find the last
row with the current key value or prefix. */
3438 3439

int
unknown's avatar
unknown committed
3440 3441 3442 3443 3444 3445 3446 3447 3448
ha_innobase::index_read_last(
/*=========================*/
			           /* out: 0, HA_ERR_KEY_NOT_FOUND, or an
				   error code */
        mysql_byte*       buf,     /* out: fetched row */
        const mysql_byte* key_ptr, /* in: key value, or a prefix of a full
				   key value */
	uint              key_len) /* in: length of the key val or prefix
				   in bytes */
3449
{
unknown's avatar
unknown committed
3450
        return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
3451 3452
}

3453
/************************************************************************
unknown's avatar
unknown committed
3454
Changes the active index of a handle. */
3455 3456 3457 3458

int
ha_innobase::change_active_index(
/*=============================*/
3459 3460 3461
			/* out: 0 or error code */
	uint 	keynr)	/* in: use this index; MAX_KEY means always clustered
			index, even if it was internally generated by
3462
			InnoDB */
3463
{
unknown's avatar
unknown committed
3464 3465
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	KEY*		key=0;
3466 3467
	statistic_increment(current_thd->status_var.ha_read_key_count,
			    &LOCK_status);
unknown's avatar
unknown committed
3468
	DBUG_ENTER("change_active_index");
3469

unknown's avatar
unknown committed
3470 3471
	ut_ad(user_thd == current_thd);
	ut_ad(prebuilt->trx ==
3472
             (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
3473

unknown's avatar
unknown committed
3474
	active_index = keynr;
3475

3476
	if (keynr != MAX_KEY && table->s->keys > 0) {
unknown's avatar
unknown committed
3477
		key = table->key_info + active_index;
3478

unknown's avatar
unknown committed
3479
		prebuilt->index = dict_table_get_index_noninline(
unknown's avatar
unknown committed
3480 3481
						     prebuilt->table,
						     key->name);
unknown's avatar
unknown committed
3482 3483
        } else {
		prebuilt->index = dict_table_get_first_index_noninline(
unknown's avatar
unknown committed
3484
							   prebuilt->table);
unknown's avatar
unknown committed
3485
	}
3486

unknown's avatar
unknown committed
3487 3488 3489 3490 3491 3492
	if (!prebuilt->index) {
	       sql_print_error(
"Innodb could not find key n:o %u with name %s from dict cache for table %s",
	      keynr, key ? key->name : "NULL", prebuilt->table->name);
	      DBUG_RETURN(1);
	}
3493

unknown's avatar
unknown committed
3494
	assert(prebuilt->search_tuple != 0);
unknown's avatar
Merge  
unknown committed
3495

unknown's avatar
unknown committed
3496
	dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
3497

unknown's avatar
unknown committed
3498
	dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
3499
			prebuilt->index->n_fields);
3500

unknown's avatar
unknown committed
3501 3502 3503 3504 3505
	/* MySQL changes the active index for a handle also during some
	queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
	and then calculates the sum. Previously we played safe and used
	the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
	copying. Starting from MySQL-4.1 we use a more efficient flag here. */
3506

unknown's avatar
unknown committed
3507
	build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
3508

unknown's avatar
unknown committed
3509
	DBUG_RETURN(0);
3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520
}

/**************************************************************************
Positions an index cursor to the index specified in keynr. Fetches the
row if any. */
/* ??? This is only used to read whole keys ??? */

int
ha_innobase::index_read_idx(
/*========================*/
					/* out: error number or 0 */
3521
	mysql_byte*	buf,		/* in/out: buffer for the returned
3522 3523
					row */
	uint 		keynr,		/* in: use this index */
3524
	const mysql_byte* key,		/* in: key value; if this is NULL
3525 3526 3527 3528 3529
					we position the cursor at the
					start or end of index */
	uint		key_len,	/* in: key value length */
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
unknown's avatar
Merge  
unknown committed
3530 3531 3532 3533
	if (change_active_index(keynr)) {

		return(1);
	}
3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546

	return(index_read(buf, key, key_len, find_flag));
}

/***************************************************************************
Reads the next or previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::general_fetch(
/*=======================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3547
	mysql_byte* 	buf,	/* in/out: buffer for next row in MySQL
3548 3549 3550 3551 3552 3553 3554 3555
				format */
	uint 	direction,	/* in: ROW_SEL_NEXT or ROW_SEL_PREV */
	uint	match_mode)	/* in: 0, ROW_SEL_EXACT, or
				ROW_SEL_EXACT_PREFIX */
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	ulint		ret;
	int		error	= 0;
3556

3557
	DBUG_ENTER("general_fetch");
3558

unknown's avatar
unknown committed
3559
	ut_ad(prebuilt->trx ==
3560
             (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
3561

unknown's avatar
unknown committed
3562
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
unknown committed
3563

unknown's avatar
Merge  
unknown committed
3564 3565
	ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode,
								direction);
unknown's avatar
unknown committed
3566
	innodb_srv_conc_exit_innodb(prebuilt->trx);
3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579

	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;
	} else {
unknown's avatar
unknown committed
3580
		error = convert_error_code_to_mysql(ret, user_thd);
3581 3582
		table->status = STATUS_NOT_FOUND;
	}
3583

3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595
	DBUG_RETURN(error);
}

/***************************************************************************
Reads the next row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_next(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3596
	mysql_byte* 	buf)	/* in/out: buffer for next row in MySQL
3597 3598
				format */
{
3599 3600
  	statistic_increment(current_thd->status_var.ha_read_next_count,
			    &LOCK_status);
3601

3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612
	return(general_fetch(buf, ROW_SEL_NEXT, 0));
}

/***********************************************************************
Reads the next row matching to the key value given as the parameter. */

int
ha_innobase::index_next_same(
/*=========================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3613 3614
	mysql_byte* 	buf,	/* in/out: buffer for the row */
	const mysql_byte* key,	/* in: key value */
3615 3616
	uint 		keylen)	/* in: key value length */
{
3617 3618
  	statistic_increment(current_thd->status_var.ha_read_next_count,
			    &LOCK_status);
3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631

	return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
}

/***************************************************************************
Reads the previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_prev(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3632
	mysql_byte* 	buf)	/* in/out: buffer for previous row in MySQL
3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644
				format */
{
	return(general_fetch(buf, ROW_SEL_PREV, 0));
}

/************************************************************************
Positions a cursor on the first record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_first(
/*=====================*/
3645
				/* out: 0, HA_ERR_END_OF_FILE,
3646 3647
				or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
3648 3649 3650 3651
{
	int	error;

  	DBUG_ENTER("index_first");
3652 3653
  	statistic_increment(current_thd->status_var.ha_read_first_count,
			    &LOCK_status);
3654 3655 3656

  	error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);

3657 3658 3659 3660 3661 3662
        /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */

  	if (error == HA_ERR_KEY_NOT_FOUND) {
  		error = HA_ERR_END_OF_FILE;
  	}

3663 3664 3665 3666 3667 3668 3669 3670 3671 3672
  	DBUG_RETURN(error);
}

/************************************************************************
Positions a cursor on the last record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_last(
/*====================*/
3673 3674
				/* out: 0, HA_ERR_END_OF_FILE, or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
3675 3676 3677
{
	int	error;

3678
  	DBUG_ENTER("index_last");
3679 3680
  	statistic_increment(current_thd->status_var.ha_read_last_count,
			    &LOCK_status);
3681 3682 3683

  	error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);

3684
        /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699

  	if (error == HA_ERR_KEY_NOT_FOUND) {
  		error = HA_ERR_END_OF_FILE;
  	}

  	DBUG_RETURN(error);
}

/********************************************************************
Initialize a table scan. */

int
ha_innobase::rnd_init(
/*==================*/
			/* out: 0 or error number */
3700
	bool	scan)	/* in: ???????? */
3701
{
unknown's avatar
Merge  
unknown committed
3702
	int	err;
unknown's avatar
unknown committed
3703

3704
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
3705

unknown's avatar
unknown committed
3706 3707 3708
	/* Store the active index value so that we can restore the original
	value after a scan */

3709
	if (prebuilt->clust_index_was_generated) {
unknown's avatar
Merge  
unknown committed
3710
		err = change_active_index(MAX_KEY);
3711
	} else {
unknown's avatar
Merge  
unknown committed
3712
		err = change_active_index(primary_key);
3713
	}
3714

3715
  	start_of_scan = 1;
3716

unknown's avatar
Merge  
unknown committed
3717
 	return(err);
3718 3719 3720
}

/*********************************************************************
unknown's avatar
unknown committed
3721
Ends a table scan. */
3722 3723 3724 3725 3726 3727

int
ha_innobase::rnd_end(void)
/*======================*/
				/* out: 0 or error number */
{
unknown's avatar
unknown committed
3728
	return(index_end());
3729 3730 3731 3732 3733 3734 3735 3736 3737 3738
}

/*********************************************************************
Reads the next row in a table scan (also used to read the FIRST row
in a table scan). */

int
ha_innobase::rnd_next(
/*==================*/
			/* out: 0, HA_ERR_END_OF_FILE, or error number */
3739
	mysql_byte* buf)/* in/out: returns the row in this buffer,
3740 3741
			in MySQL format */
{
3742
	int	error;
3743 3744

  	DBUG_ENTER("rnd_next");
3745 3746
  	statistic_increment(current_thd->status_var.ha_read_rnd_next_count,
			    &LOCK_status);
3747

3748
  	if (start_of_scan) {
3749 3750 3751 3752
		error = index_first(buf);
		if (error == HA_ERR_KEY_NOT_FOUND) {
			error = HA_ERR_END_OF_FILE;
		}
3753
		start_of_scan = 0;
3754
	} else {
3755
		error = general_fetch(buf, ROW_SEL_NEXT, 0);
3756
	}
3757

3758 3759 3760 3761
  	DBUG_RETURN(error);
}

/**************************************************************************
unknown's avatar
unknown committed
3762
Fetches a row from the table based on a row reference. */
3763

3764 3765 3766
int
ha_innobase::rnd_pos(
/*=================*/
3767 3768 3769
				/* out: 0, HA_ERR_KEY_NOT_FOUND,
				or error code */
	mysql_byte* 	buf,	/* in/out: buffer for the row */
unknown's avatar
unknown committed
3770 3771 3772 3773 3774
	mysql_byte*	pos)	/* in: primary key value of the row in the
				MySQL format, or the row id if the clustered
				index was internally generated by InnoDB;
				the length of data in pos has to be
				ref_length */
3775
{
3776 3777 3778
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int		error;
	uint		keynr	= active_index;
3779
	DBUG_ENTER("rnd_pos");
unknown's avatar
unknown committed
3780
	DBUG_DUMP("key", (char*) pos, ref_length);
unknown's avatar
unknown committed
3781

3782 3783
	statistic_increment(current_thd->status_var.ha_read_rnd_count,
			    &LOCK_status);
3784

unknown's avatar
unknown committed
3785
	ut_ad(prebuilt->trx ==
3786
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
3787

3788 3789 3790 3791
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from the row id: the
		row reference is the row id, not any key value
unknown's avatar
unknown committed
3792
		that MySQL knows of */
3793

unknown's avatar
Merge  
unknown committed
3794
		error = change_active_index(MAX_KEY);
3795
	} else {
unknown's avatar
Merge  
unknown committed
3796
		error = change_active_index(primary_key);
3797
	}
3798

unknown's avatar
Merge  
unknown committed
3799
	if (error) {
unknown's avatar
unknown committed
3800
	        DBUG_PRINT("error", ("Got error: %ld", error));
unknown's avatar
Merge  
unknown committed
3801 3802
		DBUG_RETURN(error);
	}
unknown's avatar
unknown committed
3803

unknown's avatar
unknown committed
3804 3805 3806 3807
	/* Note that we assume the length of the row reference is fixed
        for the table, and it is == ref_length */

	error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
unknown's avatar
unknown committed
3808 3809 3810

	if (error) {
		DBUG_PRINT("error", ("Got error: %ld", error));
unknown's avatar
unknown committed
3811
	}
unknown's avatar
unknown committed
3812

3813
	change_active_index(keynr);
3814

3815 3816 3817 3818
  	DBUG_RETURN(error);
}

/*************************************************************************
3819
Stores a reference to the current row to 'ref' field of the handle. Note
unknown's avatar
unknown committed
3820 3821
that in the case where we have generated the clustered index for the
table, the function parameter is illogical: we MUST ASSUME that 'record'
unknown's avatar
unknown committed
3822
is the current 'position' of the handle, because if row ref is actually
3823
the row id internally generated in InnoDB, then 'record' does not contain
3824 3825
it. We just guess that the row id must be for the record where the handle
was positioned the last time. */
3826 3827 3828 3829

void
ha_innobase::position(
/*==================*/
3830
	const mysql_byte*	record)	/* in: row in MySQL format */
3831
{
3832 3833
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	uint		len;
3834

unknown's avatar
unknown committed
3835
	ut_ad(prebuilt->trx ==
3836
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
3837

3838 3839 3840 3841
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from row id: the
		row reference will be the row id, not any key value
unknown's avatar
unknown committed
3842
		that MySQL knows of */
3843 3844 3845 3846 3847

		len = DATA_ROW_ID_LEN;

		memcpy(ref, prebuilt->row_id, len);
	} else {
3848 3849
		len = store_key_val_for_row(primary_key, (char*)ref,
							 ref_length, record);
3850
	}
3851

unknown's avatar
unknown committed
3852 3853
	/* We assume that the 'ref' value len is always fixed for the same
	table. */
unknown's avatar
unknown committed
3854
  
unknown's avatar
unknown committed
3855
	if (len != ref_length) {
unknown's avatar
unknown committed
3856
		fprintf(stderr,
unknown's avatar
unknown committed
3857
	 "InnoDB: Error: stored ref len is %lu, but table ref len is %lu\n",
3858
		  (ulong)len, (ulong)ref_length);
unknown's avatar
unknown committed
3859
	}
3860 3861 3862
}

/*********************************************************************
3863
Creates a table definition to an InnoDB database. */
3864 3865 3866 3867
static
int
create_table_def(
/*=============*/
3868
	trx_t*		trx,		/* in: InnoDB transaction handle */
3869 3870
	TABLE*		form,		/* in: information on table
					columns and indexes */
unknown's avatar
unknown committed
3871
	const char*	table_name,	/* in: table name */
unknown's avatar
unknown committed
3872
	const char*	path_of_temp_table,/* in: if this is a table explicitly
unknown's avatar
unknown committed
3873 3874 3875 3876 3877 3878 3879
					created by the user with the
					TEMPORARY keyword, then this
					parameter is the dir path where the
					table should be placed if we create
					an .ibd file for it (no .ibd extension
					in the path, though); otherwise this
					is NULL */
unknown's avatar
unknown committed
3880
	ibool		comp)		/* in: TRUE=compact record format */
3881 3882 3883 3884 3885 3886
{
	Field*		field;
	dict_table_t*	table;
	ulint		n_cols;
  	int 		error;
  	ulint		col_type;
unknown's avatar
unknown committed
3887
	ulint		col_len;
3888 3889
  	ulint		nulls_allowed;
	ulint		unsigned_type;
unknown's avatar
unknown committed
3890
	ulint		binary_type;
unknown's avatar
unknown committed
3891
	ulint		long_true_varchar;
unknown's avatar
unknown committed
3892
	ulint		charset_no;
3893
  	ulint		i;
3894

3895 3896 3897
  	DBUG_ENTER("create_table_def");
  	DBUG_PRINT("enter", ("table_name: %s", table_name));

3898
	n_cols = form->s->fields;
3899

unknown's avatar
unknown committed
3900 3901
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
3902

unknown's avatar
unknown committed
3903
	table = dict_mem_table_create(table_name, 0, n_cols, comp);
3904

unknown's avatar
unknown committed
3905 3906 3907 3908 3909
	if (path_of_temp_table) {
		table->dir_path_of_temp_table =
			mem_heap_strdup(table->heap, path_of_temp_table);
	}

3910 3911 3912
	for (i = 0; i < n_cols; i++) {
		field = form->field[i];

3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924
		col_type = get_innobase_type_from_mysql_type(field);
		if (field->null_ptr) {
			nulls_allowed = 0;
		} else {
			nulls_allowed = DATA_NOT_NULL;
		}

		if (field->flags & UNSIGNED_FLAG) {
			unsigned_type = DATA_UNSIGNED;
		} else {
			unsigned_type = 0;
		}
3925

unknown's avatar
unknown committed
3926
		if (field->binary()) {
unknown's avatar
unknown committed
3927 3928 3929 3930 3931
			binary_type = DATA_BINARY_TYPE;
		} else {
			binary_type = 0;
		}

unknown's avatar
unknown committed
3932 3933 3934 3935 3936 3937
		charset_no = 0;	

		if (dtype_is_string_type(col_type)) {

			charset_no = (ulint)field->charset()->number;

unknown's avatar
unknown committed
3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959
			ut_a(charset_no < 256); /* in data0type.h we assume
						that the number fits in one
						byte */
		}

		ut_a(field->type() < 256); /* we assume in dtype_form_prtype()
					   that this fits in one byte */
		col_len = field->pack_length();

		/* The MySQL pack length contains 1 or 2 bytes length field
		for a true VARCHAR. Let us subtract that, so that the InnoDB
		column length in the InnoDB data dictionary is the real
		maximum byte length of the actual data. */
	
		long_true_varchar = 0;

		if (field->type() == MYSQL_TYPE_VARCHAR) {
			col_len -= ((Field_varstring*)field)->length_bytes;

			if (((Field_varstring*)field)->length_bytes == 2) {
				long_true_varchar = DATA_LONG_TRUE_VARCHAR;
			}
unknown's avatar
unknown committed
3960 3961
		}

unknown's avatar
unknown committed
3962 3963 3964 3965 3966 3967 3968 3969 3970 3971
		dict_mem_table_add_col(table,
					(char*) field->field_name,
					col_type,
					dtype_form_prtype( 
					    (ulint)field->type()
					     | nulls_allowed | unsigned_type
					     | binary_type | long_true_varchar,
					    charset_no),
					col_len,
					0);
3972 3973 3974 3975
	}

	error = row_create_table_for_mysql(table, trx);

unknown's avatar
unknown committed
3976
	error = convert_error_code_to_mysql(error, NULL);
3977 3978 3979 3980 3981

	DBUG_RETURN(error);
}

/*********************************************************************
3982
Creates an index in an InnoDB database. */
3983 3984
static
int
3985 3986
create_index(
/*=========*/
3987
	trx_t*		trx,		/* in: InnoDB transaction handle */
3988 3989 3990 3991 3992
	TABLE*		form,		/* in: information on table
					columns and indexes */
	const char*	table_name,	/* in: table name */
	uint		key_num)	/* in: index number */
{
unknown's avatar
unknown committed
3993
	Field*		field;
3994
	dict_index_t*	index;
3995
  	int 		error;
3996 3997 3998 3999
	ulint		n_fields;
	KEY*		key;
	KEY_PART_INFO*	key_part;
	ulint		ind_type;
unknown's avatar
unknown committed
4000 4001
	ulint		col_type;
	ulint		prefix_len;
4002
  	ulint		i;
unknown's avatar
unknown committed
4003
  	ulint		j;
4004

4005
  	DBUG_ENTER("create_index");
4006

4007 4008 4009
	key = form->key_info + key_num;

    	n_fields = key->key_parts;
4010

4011 4012
    	ind_type = 0;

4013
    	if (key_num == form->s->primary_key) {
4014 4015
		ind_type = ind_type | DICT_CLUSTERED;
	}
4016

4017 4018 4019 4020
	if (key->flags & HA_NOSAME ) {
		ind_type = ind_type | DICT_UNIQUE;
	}

unknown's avatar
unknown committed
4021 4022
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
4023 4024 4025 4026 4027 4028

	index = dict_mem_index_create((char*) table_name, key->name, 0,
						ind_type, n_fields);
	for (i = 0; i < n_fields; i++) {
		key_part = key->key_part + i;

4029
		/* (The flag HA_PART_KEY_SEG denotes in MySQL a column prefix
unknown's avatar
unknown committed
4030 4031 4032 4033 4034 4035
		field in an index: we only store a specified number of first
		bytes of the column to the index field.) The flag does not
		seem to be properly set by MySQL. Let us fall back on testing
		the length of the key part versus the column. */
		
		field = NULL;
4036
		for (j = 0; j < form->s->fields; j++) {
unknown's avatar
unknown committed
4037 4038 4039

			field = form->field[j];

4040 4041 4042
			if (0 == innobase_strcasecmp(
					field->field_name,
					key_part->field->field_name)) {
unknown's avatar
unknown committed
4043 4044 4045 4046 4047 4048
				/* Found the corresponding column */

				break;
			}
		}

4049
		ut_a(j < form->s->fields);
unknown's avatar
unknown committed
4050 4051 4052 4053

		col_type = get_innobase_type_from_mysql_type(key_part->field);

		if (DATA_BLOB == col_type
unknown's avatar
unknown committed
4054 4055 4056 4057 4058
		    || (key_part->length < field->pack_length()
			&& field->type() != MYSQL_TYPE_VARCHAR)
		    || (field->type() == MYSQL_TYPE_VARCHAR
			&& key_part->length < field->pack_length()
			          - ((Field_varstring*)field)->length_bytes)) {
unknown's avatar
unknown committed
4059

unknown's avatar
unknown committed
4060 4061 4062 4063 4064 4065 4066 4067
		        prefix_len = key_part->length;

			if (col_type == DATA_INT
			    || col_type == DATA_FLOAT
			    || col_type == DATA_DOUBLE
			    || col_type == DATA_DECIMAL) {
			        fprintf(stderr,
"InnoDB: error: MySQL is trying to create a column prefix index field\n"
unknown's avatar
unknown committed
4068 4069
"InnoDB: on an inappropriate data type. Table name %s, column name %s.\n",
				  table_name, key_part->field->field_name);
unknown's avatar
unknown committed
4070 4071 4072 4073 4074
			        
			        prefix_len = 0;
			}
		} else {
		        prefix_len = 0;
unknown's avatar
unknown committed
4075 4076
		}

4077 4078
		/* We assume all fields should be sorted in ascending
		order, hence the '0': */
unknown's avatar
unknown committed
4079

4080
		dict_mem_index_add_field(index,
unknown's avatar
unknown committed
4081 4082
				(char*) key_part->field->field_name,
				0, prefix_len);
4083 4084 4085 4086
	}

	error = row_create_index_for_mysql(index, trx);

unknown's avatar
unknown committed
4087
	error = convert_error_code_to_mysql(error, NULL);
4088 4089 4090 4091 4092

	DBUG_RETURN(error);
}

/*********************************************************************
4093
Creates an index to an InnoDB table when the user has defined no
4094
primary index. */
4095 4096
static
int
4097 4098
create_clustered_index_when_no_primary(
/*===================================*/
4099
	trx_t*		trx,		/* in: InnoDB transaction handle */
4100 4101 4102
	const char*	table_name)	/* in: table name */
{
	dict_index_t*	index;
4103 4104
  	int 		error;

unknown's avatar
unknown committed
4105 4106
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
4107

unknown's avatar
unknown committed
4108 4109 4110
	index = dict_mem_index_create((char*) table_name,
				      (char*) "GEN_CLUST_INDEX",
				      0, DICT_CLUSTERED, 0);
4111 4112
	error = row_create_index_for_mysql(index, trx);

unknown's avatar
unknown committed
4113
	error = convert_error_code_to_mysql(error, NULL);
4114

4115
	return(error);
4116 4117 4118
}

/*********************************************************************
4119
Creates a new table to an InnoDB database. */
4120 4121 4122 4123 4124 4125 4126 4127

int
ha_innobase::create(
/*================*/
					/* out: error number */
	const char*	name,		/* in: table name */
	TABLE*		form,		/* in: information on table
					columns and indexes */
4128 4129 4130
	HA_CREATE_INFO*	create_info)	/* in: more information of the
					created table, contains also the
					create statement string */
4131 4132 4133
{
	int		error;
	dict_table_t*	innobase_table;
unknown's avatar
unknown committed
4134
	trx_t*		parent_trx;
4135
	trx_t*		trx;
unknown's avatar
unknown committed
4136
	int		primary_key_no;
4137
	uint		i;
unknown's avatar
unknown committed
4138 4139
	char		name2[FN_REFLEN];
	char		norm_name[FN_REFLEN];
unknown's avatar
unknown committed
4140
	THD		*thd= current_thd;
4141
	ib_longlong     auto_inc_value;
4142

4143 4144
  	DBUG_ENTER("ha_innobase::create");

unknown's avatar
unknown committed
4145
	DBUG_ASSERT(thd != NULL);
unknown's avatar
unknown committed
4146

4147
	if (form->s->fields > 1000) {
unknown's avatar
unknown committed
4148 4149 4150
		/* The limit probably should be REC_MAX_N_FIELDS - 3 = 1020,
		but we play safe here */

unknown's avatar
unknown committed
4151
	        DBUG_RETURN(HA_ERR_TO_BIG_ROW);
unknown's avatar
unknown committed
4152 4153
	} 

unknown's avatar
unknown committed
4154 4155 4156
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
unknown's avatar
unknown committed
4157
	parent_trx = check_trx_exists(current_thd);
unknown's avatar
unknown committed
4158 4159 4160 4161 4162 4163

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	
	
4164
	trx = trx_allocate_for_mysql();
unknown's avatar
unknown committed
4165 4166 4167
		
	trx->mysql_thd = thd;
	trx->mysql_query_str = &((*thd).query);
4168

unknown's avatar
unknown committed
4169
	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
unknown's avatar
unknown committed
4170 4171 4172
		trx->check_foreigns = FALSE;
	}

unknown's avatar
unknown committed
4173
	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
unknown's avatar
unknown committed
4174 4175 4176
		trx->check_unique_secondary = FALSE;
	}

unknown's avatar
unknown committed
4177 4178 4179 4180 4181
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}
unknown's avatar
unknown committed
4182

unknown's avatar
unknown committed
4183
	fn_format(name2, name, "", "", 2);	// Remove the .frm extension
4184 4185

	normalize_table_name(norm_name, name2);
4186

unknown's avatar
unknown committed
4187
	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
unknown's avatar
unknown committed
4188
	or lock waits can happen in it during a table create operation.
unknown's avatar
unknown committed
4189
	Drop table etc. do this latching in row0mysql.c. */
unknown's avatar
unknown committed
4190

unknown's avatar
unknown committed
4191
	row_mysql_lock_data_dictionary(trx);
unknown's avatar
unknown committed
4192 4193

	/* Create the table definition in InnoDB */
4194

unknown's avatar
unknown committed
4195 4196
	error = create_table_def(trx, form, norm_name,
		create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL,
4197
		form->s->row_type != ROW_TYPE_REDUNDANT);
unknown's avatar
unknown committed
4198

unknown's avatar
unknown committed
4199
  	if (error) {
unknown's avatar
unknown committed
4200
		innobase_commit_low(trx);
4201

unknown's avatar
unknown committed
4202
		row_mysql_unlock_data_dictionary(trx);
4203 4204 4205 4206 4207 4208

  		trx_free_for_mysql(trx);

 		DBUG_RETURN(error);
 	}

4209 4210
	/* Look for a primary key */

4211 4212
	primary_key_no= (table->s->primary_key != MAX_KEY ?
			 (int) table->s->primary_key : 
unknown's avatar
unknown committed
4213
			 -1);
4214

4215 4216 4217
	/* Our function row_get_mysql_key_number_for_index assumes
	the primary key is always number 0, if it exists */

unknown's avatar
unknown committed
4218
	DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0);
4219

4220 4221
	/* Create the keys */

4222
	if (form->s->keys == 0 || primary_key_no == -1) {
4223 4224
		/* Create an index which is used as the clustered index;
		order the rows by their row id which is internally generated
4225
		by InnoDB */
4226

4227
		error = create_clustered_index_when_no_primary(trx,
4228
							norm_name);
4229
  		if (error) {
unknown's avatar
unknown committed
4230 4231
			innobase_commit_low(trx);

unknown's avatar
unknown committed
4232
			row_mysql_unlock_data_dictionary(trx);
4233

4234 4235 4236 4237
			trx_free_for_mysql(trx);

			DBUG_RETURN(error);
      		}
4238 4239 4240
	}

	if (primary_key_no != -1) {
4241
		/* In InnoDB the clustered index must always be created
4242
		first */
unknown's avatar
unknown committed
4243 4244
	    	if ((error = create_index(trx, form, norm_name,
					  (uint) primary_key_no))) {
unknown's avatar
unknown committed
4245 4246
			innobase_commit_low(trx);

unknown's avatar
unknown committed
4247
			row_mysql_unlock_data_dictionary(trx);
4248 4249 4250 4251 4252 4253 4254

  			trx_free_for_mysql(trx);

			DBUG_RETURN(error);
      		}
      	}

4255
	for (i = 0; i < form->s->keys; i++) {
4256 4257 4258

		if (i != (uint) primary_key_no) {

unknown's avatar
unknown committed
4259
    			if ((error = create_index(trx, form, norm_name, i))) {
4260

unknown's avatar
unknown committed
4261
			  	innobase_commit_low(trx);
4262

unknown's avatar
unknown committed
4263
				row_mysql_unlock_data_dictionary(trx);
4264 4265 4266 4267 4268

  				trx_free_for_mysql(trx);

				DBUG_RETURN(error);
      			}
4269
      		}
4270
  	}
4271

unknown's avatar
unknown committed
4272
	if (current_thd->query != NULL) {
unknown's avatar
unknown committed
4273
		LEX_STRING q;
unknown's avatar
unknown committed
4274

unknown's avatar
unknown committed
4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285
		if (thd->convert_string(&q, system_charset_info,
					current_thd->query,
					current_thd->query_length,
					current_thd->charset())) {
			error = HA_ERR_OUT_OF_MEM;
		} else {
			error = row_table_add_foreign_constraints(trx,
					q.str, norm_name);

			error = convert_error_code_to_mysql(error, NULL);
		}
4286

4287 4288
		if (error) {
			innobase_commit_low(trx);
unknown's avatar
unknown committed
4289

4290
			row_mysql_unlock_data_dictionary(trx);
4291

4292
  			trx_free_for_mysql(trx);
4293

4294 4295
			DBUG_RETURN(error);
		}
4296 4297
	}

unknown's avatar
unknown committed
4298 4299
  	innobase_commit_low(trx);

unknown's avatar
unknown committed
4300
	row_mysql_unlock_data_dictionary(trx);
4301

unknown's avatar
Merge  
unknown committed
4302 4303 4304
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
4305

unknown's avatar
unknown committed
4306
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
4307

4308
	innobase_table = dict_table_get(norm_name, NULL);
4309

unknown's avatar
unknown committed
4310
	DBUG_ASSERT(innobase_table != 0);
4311

4312 4313
	if ((create_info->used_fields & HA_CREATE_USED_AUTO) &&
	   (create_info->auto_increment_value != 0)) {
4314

4315 4316
		/* Query was ALTER TABLE...AUTO_INCREMENT = x; or 
		CREATE TABLE ...AUTO_INCREMENT = x; Find out a table
4317 4318
		definition from the dictionary and get the current value
		of the auto increment field. Set a new value to the
4319 4320
		auto increment field if the value is greater than the
		maximum value in the column. */
4321

4322
		auto_inc_value = create_info->auto_increment_value;
4323 4324 4325
		dict_table_autoinc_initialize(innobase_table, auto_inc_value);
	}

4326
	/* Tell the InnoDB server that there might be work for
4327 4328 4329 4330 4331 4332 4333 4334 4335
	utility threads: */

	srv_active_wake_master_thread();

  	trx_free_for_mysql(trx);

	DBUG_RETURN(0);
}

unknown's avatar
unknown committed
4336 4337 4338 4339 4340 4341 4342 4343 4344 4345
/*********************************************************************
Discards or imports an InnoDB tablespace. */

int
ha_innobase::discard_or_import_tablespace(
/*======================================*/
				/* out: 0 == success, -1 == error */
	my_bool discard)	/* in: TRUE if discard, else import */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
4346
	dict_table_t*	dict_table;
unknown's avatar
unknown committed
4347 4348 4349 4350 4351 4352 4353
	trx_t*		trx;
	int		err;

 	DBUG_ENTER("ha_innobase::discard_or_import_tablespace");

	ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
	ut_a(prebuilt->trx ==
4354
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
4355

4356
	dict_table = prebuilt->table;
unknown's avatar
unknown committed
4357 4358 4359
	trx = prebuilt->trx;

	if (discard) {
4360
		err = row_discard_tablespace_for_mysql(dict_table->name, trx);
unknown's avatar
unknown committed
4361
	} else {
4362
		err = row_import_tablespace_for_mysql(dict_table->name, trx);
unknown's avatar
unknown committed
4363 4364
	}

unknown's avatar
unknown committed
4365
	err = convert_error_code_to_mysql(err, NULL);
unknown's avatar
unknown committed
4366

unknown's avatar
unknown committed
4367
	DBUG_RETURN(err);
unknown's avatar
unknown committed
4368 4369
}

4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409
/*********************************************************************
Deletes all rows of an InnoDB table. */

int
ha_innobase::delete_all_rows(void)
/*==============================*/
				/* out: error number */
{
	row_prebuilt_t*	prebuilt	= (row_prebuilt_t*)innobase_prebuilt;
	int		error;
	trx_t*		trx;
	THD*		thd		= current_thd;

	DBUG_ENTER("ha_innobase::delete_all_rows");

	if (thd->lex->sql_command != SQLCOM_TRUNCATE) {
	fallback:
		/* We only handle TRUNCATE TABLE t as a special case.
		DELETE FROM t will have to use ha_innobase::delete_row(). */
		DBUG_RETURN(my_errno=HA_ERR_WRONG_COMMAND);
	}

	/* Get the transaction associated with the current thd, or create one
	if not yet created */

	trx = check_trx_exists(thd);

	/* Truncate the table in InnoDB */

	error = row_truncate_table_for_mysql(prebuilt->table, trx);
	if (error == DB_ERROR) {
		/* Cannot truncate; resort to ha_innobase::delete_row() */
		goto fallback;
	}

	error = convert_error_code_to_mysql(error, NULL);

	DBUG_RETURN(error);
}

4410
/*********************************************************************
4411
Drops a table from an InnoDB database. Before calling this function,
unknown's avatar
unknown committed
4412 4413
MySQL calls innobase_commit to commit the transaction of the current user.
Then the current user cannot have locks set on the table. Drop table
4414 4415
operation inside InnoDB will remove all locks any user has on the table
inside InnoDB. */
4416 4417 4418 4419

int
ha_innobase::delete_table(
/*======================*/
unknown's avatar
unknown committed
4420 4421
				/* out: error number */
	const char*	name)	/* in: table name */
4422 4423 4424
{
	ulint	name_len;
	int	error;
unknown's avatar
unknown committed
4425
	trx_t*	parent_trx;
4426
	trx_t*	trx;
unknown's avatar
unknown committed
4427
	THD     *thd= current_thd;
4428
	char	norm_name[1000];
unknown's avatar
unknown committed
4429

unknown's avatar
unknown committed
4430
 	DBUG_ENTER("ha_innobase::delete_table");
4431

unknown's avatar
unknown committed
4432 4433 4434
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
unknown's avatar
unknown committed
4435
	parent_trx = check_trx_exists(current_thd);
unknown's avatar
unknown committed
4436 4437 4438 4439 4440 4441

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

unknown's avatar
unknown committed
4442 4443 4444 4445 4446 4447
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

4448 4449
	trx = trx_allocate_for_mysql();

unknown's avatar
unknown committed
4450 4451
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
unknown's avatar
unknown committed
4452

unknown's avatar
unknown committed
4453 4454 4455 4456 4457 4458 4459 4460
	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
		trx->check_unique_secondary = FALSE;
	}

4461 4462 4463
	name_len = strlen(name);

	assert(name_len < 1000);
4464

4465 4466
	/* Strangely, MySQL passes the table name without the '.frm'
	extension, in contrast to ::create */
4467

4468 4469
	normalize_table_name(norm_name, name);

4470
  	/* Drop the table in InnoDB */
4471

4472
	error = row_drop_table_for_mysql(norm_name, trx,
unknown's avatar
unknown committed
4473
		thd->lex->sql_command == SQLCOM_DROP_DB);
4474

unknown's avatar
Merge  
unknown committed
4475 4476 4477
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
4478

unknown's avatar
unknown committed
4479
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
4480

4481
	/* Tell the InnoDB server that there might be work for
4482 4483 4484 4485
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
4486
  	innobase_commit_low(trx);
unknown's avatar
unknown committed
4487

4488 4489
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
4490
	error = convert_error_code_to_mysql(error, NULL);
4491 4492 4493 4494

	DBUG_RETURN(error);
}

4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507
/*********************************************************************
Removes all tables in the named database inside InnoDB. */

int
innobase_drop_database(
/*===================*/
			/* out: error number */
	char*	path)	/* in: database path; inside InnoDB the name
			of the last directory in the path is used as
			the database name: for example, in 'mysql/data/test'
			the database name is 'test' */
{
	ulint	len		= 0;
unknown's avatar
unknown committed
4508
	trx_t*	parent_trx;
4509 4510 4511
	trx_t*	trx;
	char*	ptr;
	int	error;
4512
	char*	namebuf;
unknown's avatar
unknown committed
4513

unknown's avatar
unknown committed
4514 4515 4516
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
unknown's avatar
unknown committed
4517
	parent_trx = check_trx_exists(current_thd);
unknown's avatar
unknown committed
4518 4519 4520 4521 4522 4523

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

4524
	ptr = strend(path) - 2;
unknown's avatar
unknown committed
4525

4526 4527 4528 4529 4530 4531
	while (ptr >= path && *ptr != '\\' && *ptr != '/') {
		ptr--;
		len++;
	}

	ptr++;
4532
	namebuf = my_malloc(len + 2, MYF(0));
4533 4534 4535 4536

	memcpy(namebuf, ptr, len);
	namebuf[len] = '/';
	namebuf[len + 1] = '\0';
unknown's avatar
unknown committed
4537
#ifdef  __WIN__
4538
	innobase_casedn_str(namebuf);
unknown's avatar
unknown committed
4539
#endif
4540
	trx = trx_allocate_for_mysql();
unknown's avatar
unknown committed
4541 4542
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
4543

unknown's avatar
unknown committed
4544 4545 4546 4547
	if (current_thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

4548
  	error = row_drop_database_for_mysql(namebuf, trx);
4549
	my_free(namebuf, MYF(0));
4550

unknown's avatar
Merge  
unknown committed
4551 4552 4553
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
4554

unknown's avatar
unknown committed
4555
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
4556

4557 4558 4559 4560 4561
	/* Tell the InnoDB server that there might be work for
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
4562
  	innobase_commit_low(trx);
4563 4564
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
4565
	error = convert_error_code_to_mysql(error, NULL);
4566 4567 4568 4569

	return(error);
}

4570
/*************************************************************************
4571
Renames an InnoDB table. */
4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582

int
ha_innobase::rename_table(
/*======================*/
				/* out: 0 or error code */
	const char*	from,	/* in: old name of the table */
	const char*	to)	/* in: new name of the table */
{
	ulint	name_len1;
	ulint	name_len2;
	int	error;
unknown's avatar
unknown committed
4583
	trx_t*	parent_trx;
4584
	trx_t*	trx;
4585 4586
	char	norm_from[1000];
	char	norm_to[1000];
4587

4588 4589
  	DBUG_ENTER("ha_innobase::rename_table");

unknown's avatar
unknown committed
4590 4591 4592
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
unknown's avatar
unknown committed
4593
	parent_trx = check_trx_exists(current_thd);
unknown's avatar
unknown committed
4594 4595 4596 4597 4598 4599

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

unknown's avatar
unknown committed
4600 4601 4602 4603 4604 4605
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

4606
	trx = trx_allocate_for_mysql();
unknown's avatar
unknown committed
4607 4608
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
4609 4610 4611 4612 4613 4614

	name_len1 = strlen(from);
	name_len2 = strlen(to);

	assert(name_len1 < 1000);
	assert(name_len2 < 1000);
4615

4616 4617 4618
	normalize_table_name(norm_from, from);
	normalize_table_name(norm_to, to);

4619
  	/* Rename the table in InnoDB */
4620

4621
  	error = row_rename_table_for_mysql(norm_from, norm_to, trx);
4622

unknown's avatar
Merge  
unknown committed
4623 4624 4625
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
4626

unknown's avatar
unknown committed
4627
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
4628

4629
	/* Tell the InnoDB server that there might be work for
4630 4631 4632 4633
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
4634
  	innobase_commit_low(trx);
4635 4636
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
4637
	error = convert_error_code_to_mysql(error, NULL);
4638 4639 4640 4641 4642 4643 4644 4645 4646 4647

	DBUG_RETURN(error);
}

/*************************************************************************
Estimates the number of index records in a range. */

ha_rows
ha_innobase::records_in_range(
/*==========================*/
unknown's avatar
unknown committed
4648 4649
						/* out: estimated number of
						rows */
unknown's avatar
unknown committed
4650 4651 4652 4653 4654
	uint 			keynr,		/* in: index number */
        key_range		*min_key,	/* in: start key value of the
                                                   range, may also be 0 */
	key_range		*max_key)	/* in: range end key val, may
                                                   also be 0 */
4655 4656 4657 4658
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	KEY*		key;
	dict_index_t*	index;
4659
	mysql_byte*	key_val_buff2 	= (mysql_byte*) my_malloc(
4660
						  table->s->reclength
unknown's avatar
unknown committed
4661
      					+ table->s->max_key_length + 100,
4662
								MYF(MY_WME));
4663
	ulint		buff2_len = table->s->reclength
unknown's avatar
unknown committed
4664
      					+ table->s->max_key_length + 100;
4665
	dtuple_t*	range_start;
4666
	dtuple_t*	range_end;
unknown's avatar
unknown committed
4667
	ib_longlong	n_rows;
4668 4669
	ulint		mode1;
	ulint		mode2;
4670 4671
	void*           heap1;
	void*           heap2;
4672

4673
   	DBUG_ENTER("records_in_range");
4674

unknown's avatar
unknown committed
4675 4676
	prebuilt->trx->op_info = (char*)"estimating records in index range";

unknown's avatar
unknown committed
4677 4678 4679 4680
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
unknown's avatar
unknown committed
4681

4682 4683 4684
	active_index = keynr;

	key = table->key_info + active_index;
4685

4686
	index = dict_table_get_index_noninline(prebuilt->table, key->name);
4687

4688
	range_start = dtuple_create_for_mysql(&heap1, key->key_parts);
4689
 	dict_index_copy_types(range_start, index, key->key_parts);
4690

4691
	range_end = dtuple_create_for_mysql(&heap2, key->key_parts);
4692
 	dict_index_copy_types(range_end, index, key->key_parts);
4693

4694
	row_sel_convert_mysql_key_to_innobase(
unknown's avatar
unknown committed
4695 4696 4697
				range_start, (byte*) key_val_buff,
				(ulint)upd_and_key_val_buff_len,
				index,
unknown's avatar
unknown committed
4698 4699
				(byte*) (min_key ? min_key->key :
                                         (const mysql_byte*) 0),
4700 4701
				(ulint) (min_key ? min_key->length : 0),
				prebuilt->trx);
4702

4703
	row_sel_convert_mysql_key_to_innobase(
unknown's avatar
unknown committed
4704 4705
				range_end, (byte*) key_val_buff2,
				buff2_len, index,
unknown's avatar
unknown committed
4706 4707
				(byte*) (max_key ? max_key->key :
                                         (const mysql_byte*) 0),
4708 4709
				(ulint) (max_key ? max_key->length : 0),
				prebuilt->trx);
unknown's avatar
unknown committed
4710 4711 4712 4713 4714

	mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
                                                HA_READ_KEY_EXACT);
	mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
                                                HA_READ_KEY_EXACT);
4715

4716
	n_rows = btr_estimate_n_rows_in_range(index, range_start,
4717
						mode1, range_end, mode2);
4718 4719
	dtuple_free_for_mysql(heap1);
	dtuple_free_for_mysql(heap2);
4720

4721 4722
    	my_free((char*) key_val_buff2, MYF(0));

unknown's avatar
unknown committed
4723 4724
	prebuilt->trx->op_info = (char*)"";

unknown's avatar
unknown committed
4725 4726 4727 4728 4729 4730 4731 4732 4733 4734
	/* The MySQL optimizer seems to believe an estimate of 0 rows is
	always accurate and may return the result 'Empty set' based on that.
	The accuracy is not guaranteed, and even if it were, for a locking
	read we should anyway perform the search to set the next-key lock.
	Add 1 to the value to make sure MySQL does not make the assumption! */

	if (n_rows == 0) {
	        n_rows = 1;
	}

4735 4736 4737
	DBUG_RETURN((ha_rows) n_rows);
}

4738 4739
/*************************************************************************
Gives an UPPER BOUND to the number of rows in a table. This is used in
unknown's avatar
unknown committed
4740
filesort.cc. */
4741 4742

ha_rows
unknown's avatar
unknown committed
4743
ha_innobase::estimate_rows_upper_bound(void)
4744
/*======================================*/
4745
			/* out: upper bound of rows */
4746 4747
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
4748 4749
	dict_index_t*	index;
	ulonglong	estimate;
4750
	ulonglong	local_data_file_length;
unknown's avatar
unknown committed
4751

unknown's avatar
unknown committed
4752
 	DBUG_ENTER("estimate_rows_upper_bound");
4753

unknown's avatar
unknown committed
4754 4755 4756 4757 4758 4759
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

unknown's avatar
unknown committed
4760 4761 4762
	prebuilt->trx->op_info = (char*)
	                         "calculating upper bound for table rows";

unknown's avatar
unknown committed
4763 4764 4765 4766
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
4767

4768
	index = dict_table_get_first_index_noninline(prebuilt->table);
unknown's avatar
unknown committed
4769

4770
	local_data_file_length = ((ulonglong) index->stat_n_leaf_pages)
4771
    							* UNIV_PAGE_SIZE;
4772

unknown's avatar
unknown committed
4773 4774
	/* Calculate a minimum length for a clustered index record and from
	that an upper bound for the number of rows. Since we only calculate
unknown's avatar
unknown committed
4775 4776
	new statistics in row0mysql.c when a table has grown by a threshold
	factor, we must add a safety factor 2 in front of the formula below. */
unknown's avatar
unknown committed
4777

unknown's avatar
unknown committed
4778 4779
	estimate = 2 * local_data_file_length /
					 dict_index_calc_min_rec_len(index);
unknown's avatar
unknown committed
4780

unknown's avatar
unknown committed
4781 4782
	prebuilt->trx->op_info = (char*)"";

unknown's avatar
unknown committed
4783
	DBUG_RETURN((ha_rows) estimate);
4784 4785
}

4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797
/*************************************************************************
How many seeks it will take to read through the table. This is to be
comparable to the number returned by records_in_range so that we can
decide if we should scan the table or use keys. */

double
ha_innobase::scan_time()
/*====================*/
			/* out: estimated time measured in disk seeks */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;

unknown's avatar
unknown committed
4798 4799 4800 4801 4802 4803
	/* Since MySQL seems to favor table scans too much over index
	searches, we pretend that a sequential read takes the same time
	as a random disk read, that is, we do not divide the following
	by 10, which would be physically realistic. */
	
	return((double) (prebuilt->table->stat_clustered_index_size));
4804 4805
}

unknown's avatar
unknown committed
4806 4807 4808
/**********************************************************************
Calculate the time it takes to read a set of ranges through an index
This enables us to optimise reads for clustered indexes. */
unknown's avatar
unknown committed
4809

unknown's avatar
unknown committed
4810 4811 4812 4813 4814 4815 4816
double
ha_innobase::read_time(
/*===================*/
			/* out: estimated time measured in disk seeks */
	uint    index,	/* in: key number */
	uint	ranges,	/* in: how many ranges */
	ha_rows rows)	/* in: estimated number of rows in the ranges */
unknown's avatar
unknown committed
4817
{
unknown's avatar
unknown committed
4818 4819 4820
	ha_rows total_rows;
	double  time_for_scan;
  
unknown's avatar
unknown committed
4821 4822 4823 4824
	if (index != table->s->primary_key) {
		/* Not clustered */		
	  	return(handler::read_time(index, ranges, rows));
	}
unknown's avatar
unknown committed
4825

unknown's avatar
unknown committed
4826
	if (rows <= 2) {
unknown's avatar
unknown committed
4827

unknown's avatar
unknown committed
4828 4829
		return((double) rows);
	}
unknown's avatar
unknown committed
4830 4831 4832 4833

	/* Assume that the read time is proportional to the scan time for all
	rows + at most one seek per range. */

unknown's avatar
unknown committed
4834
	time_for_scan = scan_time();
unknown's avatar
unknown committed
4835

unknown's avatar
unknown committed
4836
	if ((total_rows = estimate_rows_upper_bound()) < rows) {
unknown's avatar
unknown committed
4837

unknown's avatar
unknown committed
4838 4839
	  	return(time_for_scan);
	}
unknown's avatar
unknown committed
4840

unknown's avatar
unknown committed
4841
	return(ranges + (double) rows / (double) total_rows * time_for_scan);
unknown's avatar
unknown committed
4842 4843
}

4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855
/*************************************************************************
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object. */

void
ha_innobase::info(
/*==============*/
	uint flag)	/* in: what information MySQL requests */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	dict_table_t*	ib_table;
	dict_index_t*	index;
4856
	ha_rows		rec_per_key;
4857 4858
	ulong		j;
	ulong		i;
4859 4860
	char		path[FN_REFLEN];
	os_file_stat_t  stat_info;
4861

4862 4863
 	DBUG_ENTER("info");

unknown's avatar
unknown committed
4864 4865 4866 4867 4868 4869
        /* If we are forcing recovery at a high level, we will suppress
	statistics calculation on tables, because that may crash the
	server if an index is badly corrupted. */

        if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {

4870
                DBUG_VOID_RETURN;
unknown's avatar
unknown committed
4871 4872
        }

unknown's avatar
unknown committed
4873 4874 4875 4876 4877 4878 4879 4880 4881
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

unknown's avatar
unknown committed
4882 4883
	prebuilt->trx->op_info = (char*)"returning various info to MySQL";

unknown's avatar
unknown committed
4884
	trx_search_latch_release_if_reserved(prebuilt->trx);
unknown's avatar
unknown committed
4885

4886 4887 4888 4889 4890 4891
 	ib_table = prebuilt->table;

 	if (flag & HA_STATUS_TIME) {
 		/* In sql_show we call with this flag: update then statistics
 		so that they are up-to-date */

unknown's avatar
unknown committed
4892 4893
	        prebuilt->trx->op_info = (char*)"updating table statistics";

4894
 		dict_update_statistics(ib_table);
unknown's avatar
unknown committed
4895 4896 4897

		prebuilt->trx->op_info = (char*)
		                          "returning various info to MySQL";
4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911

		if (ib_table->space != 0) {
			my_snprintf(path, sizeof(path), "%s/%s%s",
				    mysql_data_home, ib_table->name,
				    ".ibd");
			unpack_filename(path,path);
		} else {
			my_snprintf(path, sizeof(path), "%s/%s%s", 
				    mysql_data_home, ib_table->name,
				    reg_ext);
		
			unpack_filename(path,path);
		}

4912 4913 4914
		/* Note that we do not know the access time of the table, 
		nor the CHECK TABLE time, nor the UPDATE or INSERT time. */

4915 4916 4917
		if (os_file_get_status(path,&stat_info)) {
			create_time = stat_info.ctime;
		}
4918 4919 4920
 	}

	if (flag & HA_STATUS_VARIABLE) {
4921
    		records = (ha_rows)ib_table->stat_n_rows;
4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934
    		deleted = 0;
    		data_file_length = ((ulonglong)
				ib_table->stat_clustered_index_size)
    					* UNIV_PAGE_SIZE;
    		index_file_length = ((ulonglong)
				ib_table->stat_sum_of_other_index_sizes)
    					* UNIV_PAGE_SIZE;
    		delete_length = 0;
    		check_time = 0;

    		if (records == 0) {
    			mean_rec_length = 0;
    		} else {
4935
    			mean_rec_length = (ulong) (data_file_length / records);
4936 4937 4938 4939 4940 4941 4942 4943 4944
    		}
    	}

	if (flag & HA_STATUS_CONST) {
		index = dict_table_get_first_index_noninline(ib_table);

		if (prebuilt->clust_index_was_generated) {
			index = dict_table_get_next_index_noninline(index);
		}
4945

4946
		for (i = 0; i < table->s->keys; i++) {
unknown's avatar
unknown committed
4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957
			if (index == NULL) {
				ut_print_timestamp(stderr);
			        fprintf(stderr,
"  InnoDB: Error: table %s contains less indexes inside InnoDB\n"
"InnoDB: than are defined in the MySQL .frm file. Have you mixed up\n"
"InnoDB: .frm files from different installations? See section\n"
"InnoDB: 15.1 at http://www.innodb.com/ibman.html\n",
				   ib_table->name);
				break;
			}

4958 4959
			for (j = 0; j < table->key_info[i].key_parts; j++) {

unknown's avatar
unknown committed
4960 4961 4962 4963 4964 4965 4966 4967
				if (j + 1 > index->n_uniq) {
				        ut_print_timestamp(stderr);
			                fprintf(stderr,
"  InnoDB: Error: index %s of %s has %lu columns unique inside InnoDB\n"
"InnoDB: but MySQL is asking statistics for %lu columns. Have you mixed up\n"
"InnoDB: .frm files from different installations? See section\n"
"InnoDB: 15.1 at http://www.innodb.com/ibman.html\n",
						index->name,
4968 4969
						ib_table->name,
						(unsigned long) index->n_uniq,
unknown's avatar
unknown committed
4970 4971 4972 4973
						j + 1);
				        break;
				}

4974 4975 4976 4977
				if (index->stat_n_diff_key_vals[j + 1] == 0) {

					rec_per_key = records;
				} else {
4978
					rec_per_key = (ha_rows)(records /
4979 4980 4981
   				         index->stat_n_diff_key_vals[j + 1]);
				}

unknown's avatar
unknown committed
4982 4983 4984 4985 4986 4987 4988
				/* Since MySQL seems to favor table scans
				too much over index searches, we pretend
				index selectivity is 2 times better than
				our estimate: */

				rec_per_key = rec_per_key / 2;

4989 4990 4991
				if (rec_per_key == 0) {
					rec_per_key = 1;
				}
unknown's avatar
unknown committed
4992

4993 4994 4995
 				table->key_info[i].rec_per_key[j]=
				  rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
				  rec_per_key;
4996
			}
unknown's avatar
unknown committed
4997

4998
			index = dict_table_get_next_index_noninline(index);
4999 5000
		}
	}
5001 5002

  	if (flag & HA_STATUS_ERRKEY) {
unknown's avatar
unknown committed
5003 5004
		ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);

5005
		errkey = (unsigned int) row_get_mysql_key_number_for_index(
5006 5007
				       (dict_index_t*)
				       trx_get_error_info(prebuilt->trx));
5008 5009
  	}

unknown's avatar
unknown committed
5010 5011
	prebuilt->trx->op_info = (char*)"";

5012 5013 5014
  	DBUG_VOID_RETURN;
}

unknown's avatar
unknown committed
5015
/**************************************************************************
unknown's avatar
unknown committed
5016 5017
Updates index cardinalities of the table, based on 8 random dives into
each index tree. This does NOT calculate exact statistics on the table. */
unknown's avatar
unknown committed
5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031

int
ha_innobase::analyze(
/*=================*/			 
					/* out: returns always 0 (success) */
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
{
	/* Simply call ::info() with all the flags */
	info(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE);

	return(0);
}

unknown's avatar
unknown committed
5032
/**************************************************************************
5033 5034
This is mapped to "ALTER TABLE tablename TYPE=InnoDB", which rebuilds
the table in MySQL. */
5035

unknown's avatar
unknown committed
5036 5037 5038 5039 5040
int
ha_innobase::optimize(
/*==================*/
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
5041
{
5042
        return(HA_ADMIN_TRY_ALTER);
5043 5044
}

unknown's avatar
unknown committed
5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060
/***********************************************************************
Tries to check that an InnoDB table is not corrupted. If corruption is
noticed, prints to stderr information about it. In case of corruption
may also assert a failure and crash the server. */

int
ha_innobase::check(
/*===============*/
					/* out: HA_ADMIN_CORRUPT or
					HA_ADMIN_OK */
	THD* 		thd,		/* in: user thread handle */
	HA_CHECK_OPT* 	check_opt)	/* in: check options, currently
					ignored */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	ulint		ret;
unknown's avatar
unknown committed
5061

unknown's avatar
unknown committed
5062
	ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
unknown's avatar
unknown committed
5063
	ut_a(prebuilt->trx ==
5064
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
5065

unknown's avatar
unknown committed
5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077
	if (prebuilt->mysql_template == NULL) {
		/* Build the template; we will use a dummy template
		in index scans done in checking */

		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}

	ret = row_check_table_for_mysql(prebuilt);

	if (ret == DB_SUCCESS) {
		return(HA_ADMIN_OK);
	}
unknown's avatar
unknown committed
5078

unknown's avatar
unknown committed
5079 5080 5081
  	return(HA_ADMIN_CORRUPT); 
}

5082
/*****************************************************************
unknown's avatar
Merge  
unknown committed
5083 5084 5085
Adds information about free space in the InnoDB tablespace to a table comment
which is printed out when a user calls SHOW TABLE STATUS. Adds also info on
foreign keys. */
5086 5087 5088 5089

char*
ha_innobase::update_table_comment(
/*==============================*/
unknown's avatar
Merge  
unknown committed
5090 5091 5092
				/* out: table comment + InnoDB free space +
				info on foreign keys */
        const char*	comment)/* in: table comment defined by user */
5093
{
5094 5095 5096
	uint	length			= strlen(comment);
	char*				str;
	row_prebuilt_t*	prebuilt	= (row_prebuilt_t*)innobase_prebuilt;
5097

unknown's avatar
unknown committed
5098 5099 5100 5101
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

5102 5103 5104 5105
	if(length > 64000 - 3) {
		return((char*)comment); /* string too long */
	}

unknown's avatar
unknown committed
5106 5107
	update_thd(current_thd);

unknown's avatar
unknown committed
5108 5109
	prebuilt->trx->op_info = (char*)"returning table comment";

unknown's avatar
unknown committed
5110 5111 5112 5113
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
5114
	str = NULL;
unknown's avatar
unknown committed
5115

5116
	if (FILE* file = os_file_create_tmpfile()) {
5117
		long	flen;
unknown's avatar
Merge  
unknown committed
5118

5119 5120
		/* output the data to a temporary file */
		fprintf(file, "InnoDB free: %lu kB",
unknown's avatar
unknown committed
5121 5122
      		   (ulong) fsp_get_available_space_in_free_extents(
      					prebuilt->table->space));
5123

5124 5125
		dict_print_info_on_foreign_keys(FALSE, file,
				prebuilt->trx, prebuilt->table);
5126
		flen = ftell(file);
5127 5128 5129
		if (flen < 0) {
			flen = 0;
		} else if (length + flen + 3 > 64000) {
5130 5131
			flen = 64000 - 3 - length;
		}
5132

5133 5134
		/* allocate buffer for the full string, and
		read the contents of the temporary file */
5135

5136
		str = my_malloc(length + flen + 3, MYF(0));
5137

5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150
		if (str) {
			char* pos	= str + length;
			if(length) {
				memcpy(str, comment, length);
				*pos++ = ';';
				*pos++ = ' ';
			}
			rewind(file);
			flen = fread(pos, 1, flen, file);
			pos[flen] = 0;
		}

		fclose(file);
unknown's avatar
unknown committed
5151
	}
unknown's avatar
unknown committed
5152

unknown's avatar
unknown committed
5153 5154
        prebuilt->trx->op_info = (char*)"";

5155
  	return(str ? str : (char*) comment);
5156 5157
}

unknown's avatar
unknown committed
5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168
/***********************************************************************
Gets the foreign key create info for a table stored in InnoDB. */

char*
ha_innobase::get_foreign_key_create_info(void)
/*==========================================*/
			/* out, own: character string in the form which
			can be inserted to the CREATE TABLE statement,
			MUST be freed with ::free_foreign_key_create_info */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
5169
	char*	str	= 0;
unknown's avatar
unknown committed
5170

unknown's avatar
unknown committed
5171
	ut_a(prebuilt != NULL);
5172

unknown's avatar
unknown committed
5173 5174 5175 5176 5177 5178
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

5179
	if (FILE* file = os_file_create_tmpfile()) {
5180
		long	flen;
unknown's avatar
unknown committed
5181

5182
		prebuilt->trx->op_info = (char*)"getting info on foreign keys";
unknown's avatar
unknown committed
5183

5184 5185 5186
		/* In case MySQL calls this in the middle of a SELECT query,
		release possible adaptive hash latch to avoid
		deadlocks of threads */
5187

5188
		trx_search_latch_release_if_reserved(prebuilt->trx);
unknown's avatar
unknown committed
5189

5190
		/* output the data to a temporary file */
5191 5192
		dict_print_info_on_foreign_keys(TRUE, file,
				prebuilt->trx, prebuilt->table);
5193 5194 5195
		prebuilt->trx->op_info = (char*)"";

		flen = ftell(file);
5196 5197 5198
		if (flen < 0) {
			flen = 0;
		} else if(flen > 64000 - 1) {
5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215
			flen = 64000 - 1;
		}

		/* allocate buffer for the string, and
		read the contents of the temporary file */

		str = my_malloc(flen + 1, MYF(0));

		if (str) {
			rewind(file);
			flen = fread(str, 1, flen, file);
			str[flen] = 0;
		}

		fclose(file);
	} else {
		/* unable to create temporary file */
unknown's avatar
unknown committed
5216
          	str = my_malloc(1, MYF(MY_ZEROFILL));
5217
	}
unknown's avatar
unknown committed
5218

unknown's avatar
Merge  
unknown committed
5219
  	return(str);
unknown's avatar
unknown committed
5220
}
unknown's avatar
unknown committed
5221

5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233

int 
ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
{
  dict_foreign_t* foreign;

  DBUG_ENTER("get_foreign_key_list");
  row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
  ut_a(prebuilt != NULL);
  update_thd(current_thd);
  prebuilt->trx->op_info = (char*)"getting list of foreign keys";
  trx_search_latch_release_if_reserved(prebuilt->trx);
5234
  mutex_enter_noninline(&(dict_sys->mutex));
5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248
  foreign = UT_LIST_GET_FIRST(prebuilt->table->foreign_list);

  while (foreign != NULL) 
  {
    uint i;
    FOREIGN_KEY_INFO f_key_info;
    LEX_STRING *name= 0;
    const char *tmp_buff;

    tmp_buff= foreign->id;
    i= 0;
    while (tmp_buff[i] != '/')
      i++;
    tmp_buff+= i + 1;
5249
    f_key_info.forein_id= make_lex_string(thd, 0,
5250 5251 5252 5253 5254
                                          tmp_buff, strlen(tmp_buff), 1);
    tmp_buff= foreign->referenced_table_name;
    i= 0;
    while (tmp_buff[i] != '/')
      i++;
5255
    f_key_info.referenced_db= make_lex_string(thd, 0,
5256 5257
                                              tmp_buff, i, 1);
    tmp_buff+= i + 1;
5258
    f_key_info.referenced_table= make_lex_string(thd, 0,
unknown's avatar
unknown committed
5259
                                               tmp_buff, strlen(tmp_buff), 1);
5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313

    for (i= 0;;)
    {
      tmp_buff= foreign->foreign_col_names[i];
      name= make_lex_string(thd, name, tmp_buff, strlen(tmp_buff), 1);
      f_key_info.foreign_fields.push_back(name);
      tmp_buff= foreign->referenced_col_names[i];
      name= make_lex_string(thd, name, tmp_buff, strlen(tmp_buff), 1);
      f_key_info.referenced_fields.push_back(name);
      if (++i >= foreign->n_fields)
        break;
    }

    ulong length= 0;
    if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE)
    {
      length=17;
      tmp_buff= "ON DELETE CASCADE";
    }	
    else if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL)
    {
      length=18;
      tmp_buff= "ON DELETE SET NULL";
    }
    else if (foreign->type == DICT_FOREIGN_ON_DELETE_NO_ACTION)
    {
      length=19;
      tmp_buff= "ON DELETE NO ACTION";
    }
    else if (foreign->type == DICT_FOREIGN_ON_UPDATE_CASCADE)
    {
      length=17;
      tmp_buff= "ON UPDATE CASCADE";
    }
    else if (foreign->type == DICT_FOREIGN_ON_UPDATE_SET_NULL)
    {
      length=18;
      tmp_buff= "ON UPDATE SET NULL";
    }
    else if (foreign->type == DICT_FOREIGN_ON_UPDATE_NO_ACTION)
    {
      length=19;
      tmp_buff= "ON UPDATE NO ACTION";
    }
    f_key_info.constraint_method= make_lex_string(thd,
                                                  f_key_info.constraint_method,
                                                  tmp_buff, length, 1);

    FOREIGN_KEY_INFO *pf_key_info= ((FOREIGN_KEY_INFO *) 
                                    thd->memdup((gptr) &f_key_info,
                                                sizeof(FOREIGN_KEY_INFO)));
    f_key_list->push_back(pf_key_info);
    foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
  }
5314
  mutex_exit_noninline(&(dict_sys->mutex));
5315 5316 5317 5318
  prebuilt->trx->op_info = (char*)"";
  DBUG_RETURN(0);
}

unknown's avatar
unknown committed
5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338
/***********************************************************************
Checks if a table is referenced by a foreign key. The MySQL manual states that
a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
delete is then allowed internally to resolve a duplicate key conflict in
REPLACE, not an update. */

uint
ha_innobase::referenced_by_foreign_key(void)
/*========================================*/
			/* out: > 0 if referenced by a FOREIGN KEY */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;

	if (dict_table_referenced_by_foreign_key(prebuilt->table)) {

		return(1);
	}

	return(0);
}
unknown's avatar
unknown committed
5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349

/***********************************************************************
Frees the foreign key create info for a table stored in InnoDB, if it is
non-NULL. */

void
ha_innobase::free_foreign_key_create_info(
/*======================================*/
	char*	str)	/* in, own: create info string to free  */
{
	if (str) {
5350
		my_free(str, MYF(0));
unknown's avatar
unknown committed
5351
	}
5352 5353
}

unknown's avatar
unknown committed
5354 5355 5356 5357 5358 5359 5360 5361
/***********************************************************************
Tells something additional to the handler about how to do things. */

int
ha_innobase::extra(
/*===============*/
			   /* out: 0 or error number */
	enum ha_extra_function operation)
unknown's avatar
unknown committed
5362 5363
                           /* in: HA_EXTRA_RETRIEVE_ALL_COLS or some
			   other flag */
unknown's avatar
unknown committed
5364 5365 5366 5367 5368 5369 5370 5371
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;

	/* Warning: since it is not sure that MySQL calls external_lock
	before calling this function, the trx field in prebuilt can be
	obsolete! */

	switch (operation) {
unknown's avatar
unknown committed
5372 5373 5374 5375 5376 5377 5378 5379 5380
                case HA_EXTRA_FLUSH:
                        if (prebuilt->blob_heap) {
                                row_mysql_prebuilt_free_blob_heap(prebuilt);
                        }
                        break;
                case HA_EXTRA_RESET:
                        if (prebuilt->blob_heap) {
                                row_mysql_prebuilt_free_blob_heap(prebuilt);
                        }
5381
                        prebuilt->keep_other_fields_on_keyread = 0;
unknown's avatar
unknown committed
5382 5383 5384
                        prebuilt->read_just_key = 0;
                        break;
  		case HA_EXTRA_RESET_STATE:
5385
	        	prebuilt->keep_other_fields_on_keyread = 0;
unknown's avatar
unknown committed
5386
	        	prebuilt->read_just_key = 0;
unknown's avatar
unknown committed
5387
    	        	break;
unknown's avatar
unknown committed
5388 5389 5390
		case HA_EXTRA_NO_KEYREAD:
    			prebuilt->read_just_key = 0;
    			break;
unknown's avatar
unknown committed
5391
	        case HA_EXTRA_RETRIEVE_ALL_COLS:
unknown's avatar
unknown committed
5392 5393 5394 5395 5396 5397 5398 5399
			prebuilt->hint_need_to_fetch_extra_cols
					= ROW_RETRIEVE_ALL_COLS;
			break;
	        case HA_EXTRA_RETRIEVE_PRIMARY_KEY:
			if (prebuilt->hint_need_to_fetch_extra_cols == 0) {
				prebuilt->hint_need_to_fetch_extra_cols
					= ROW_RETRIEVE_PRIMARY_KEY;
			}
unknown's avatar
unknown committed
5400 5401 5402 5403
			break;
	        case HA_EXTRA_KEYREAD:
	        	prebuilt->read_just_key = 1;
	        	break;
5404 5405 5406
		case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
			prebuilt->keep_other_fields_on_keyread = 1;
			break;
unknown's avatar
unknown committed
5407 5408 5409 5410 5411 5412 5413
		default:/* Do nothing */
			;
	}

	return(0);
}

unknown's avatar
unknown committed
5414
/**********************************************************************
unknown's avatar
unknown committed
5415 5416 5417 5418 5419
MySQL calls this function at the start of each SQL statement inside LOCK
TABLES. Inside LOCK TABLES the ::external_lock method does not work to
mark SQL statement borders. Note also a special case: if a temporary table
is created inside LOCK TABLES, MySQL has not called external_lock() at all
on that table. */
unknown's avatar
unknown committed
5420 5421

int
unknown's avatar
unknown committed
5422 5423
ha_innobase::start_stmt(
/*====================*/
unknown's avatar
unknown committed
5424 5425 5426 5427 5428 5429 5430 5431 5432 5433
	              /* out: 0 or error code */
	THD*    thd)  /* in: handle to the user thread */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

	update_thd(thd);

	trx = prebuilt->trx;

unknown's avatar
unknown committed
5434 5435 5436 5437 5438 5439 5440
	/* Here we release the search latch and the InnoDB thread FIFO ticket
	if they were reserved. They should have been released already at the
	end of the previous statement, but because inside LOCK TABLES the
	lock count method does not work to mark the end of a SELECT statement,
	that may not be the case. We MUST release the search latch before an
	INSERT, for example. */

unknown's avatar
unknown committed
5441 5442
	innobase_release_stat_resources(trx);

unknown's avatar
unknown committed
5443 5444 5445 5446 5447 5448 5449 5450
	if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
	    						&& trx->read_view) {
	    	/* At low transaction isolation levels we let
		each consistent read set its own snapshot */

	    	read_view_close_for_mysql(trx);
	}

unknown's avatar
unknown committed
5451 5452
	auto_inc_counter_for_this_stat = 0;
	prebuilt->sql_stat_start = TRUE;
unknown's avatar
unknown committed
5453
	prebuilt->hint_need_to_fetch_extra_cols = 0;
unknown's avatar
unknown committed
5454
	prebuilt->read_just_key = 0;
5455
        prebuilt->keep_other_fields_on_keyread = FALSE;
unknown's avatar
unknown committed
5456

5457
	if (!prebuilt->mysql_has_locked) {
unknown's avatar
unknown committed
5458 5459 5460 5461 5462 5463
	        /* This handle is for a temporary table created inside
	        this same LOCK TABLES; since MySQL does NOT call external_lock
	        in this case, we must use x-row locks inside InnoDB to be
	        prepared for an update of a row */
	  
	        prebuilt->select_lock_type = LOCK_X;
unknown's avatar
unknown committed
5464 5465 5466 5467
	} else {
		if (trx->isolation_level != TRX_ISO_SERIALIZABLE
		    && thd->lex->sql_command == SQLCOM_SELECT
		    && thd->lex->lock_option == TL_READ) {
unknown's avatar
unknown committed
5468
	
unknown's avatar
unknown committed
5469 5470 5471 5472 5473 5474 5475 5476 5477
			/* For other than temporary tables, we obtain
			no lock for consistent read (plain SELECT). */

			prebuilt->select_lock_type = LOCK_NONE;
		} else {
			/* Not a consistent read: restore the
			select_lock_type value. The value of
			stored_select_lock_type was decided in:
			1) ::store_lock(),
5478 5479 5480
			2) ::external_lock(), 
			3) ::init_table_handle_for_HANDLER(), and 
			4) :.transactional_table_lock(). */
unknown's avatar
unknown committed
5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499

			prebuilt->select_lock_type =
				prebuilt->stored_select_lock_type;
		}

		if (prebuilt->stored_select_lock_type != LOCK_S
		    && prebuilt->stored_select_lock_type != LOCK_X) {
			fprintf(stderr,
"InnoDB: Error: stored_select_lock_type is %lu inside ::start_stmt()!\n",
			prebuilt->stored_select_lock_type);

			/* Set the value to LOCK_X: this is just fault
			tolerance, we do not know what the correct value
			should be! */

			prebuilt->select_lock_type = LOCK_X;
		}
	}

unknown's avatar
unknown committed
5500
	/* Set the MySQL flag to mark that there is an active transaction */
5501 5502 5503 5504 5505
        if (trx->active_trans == 0) {

                register_trans(thd);
                trx->active_trans = 1;
        }
unknown's avatar
unknown committed
5506 5507

	return(0);
unknown's avatar
unknown committed
5508 5509
}

unknown's avatar
unknown committed
5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520
/**********************************************************************
Maps a MySQL trx isolation level code to the InnoDB isolation level code */
inline
ulint
innobase_map_isolation_level(
/*=========================*/
					/* out: InnoDB isolation level */
	enum_tx_isolation	iso)	/* in: MySQL isolation level code */
{
	switch(iso) {
		case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
unknown's avatar
unknown committed
5521
		case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
unknown's avatar
unknown committed
5522 5523 5524 5525 5526 5527
		case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
		case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
		default: ut_a(0); return(0);
	}	
}
	
unknown's avatar
unknown committed
5528 5529
/**********************************************************************
As MySQL will execute an external lock for every new table it uses when it
unknown's avatar
unknown committed
5530 5531 5532
starts to process an SQL statement (an exception is when MySQL calls
start_stmt for the handle) we can use this function to store the pointer to
the THD in the handle. We will also use this function to communicate
unknown's avatar
unknown committed
5533 5534 5535 5536 5537 5538 5539
to InnoDB that a new SQL statement has started and that we must store a
savepoint to our transaction handle, so that we are able to roll back
the SQL statement in case of an error. */

int
ha_innobase::external_lock(
/*=======================*/
5540
			        /* out: 0 */
unknown's avatar
unknown committed
5541 5542 5543 5544 5545 5546 5547
	THD*	thd,		/* in: handle to the user thread */
	int 	lock_type)	/* in: lock type */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

  	DBUG_ENTER("ha_innobase::external_lock");
unknown's avatar
unknown committed
5548
	DBUG_PRINT("enter",("lock_type: %d", lock_type));
unknown's avatar
unknown committed
5549 5550 5551 5552 5553 5554

	update_thd(thd);

	trx = prebuilt->trx;

	prebuilt->sql_stat_start = TRUE;
unknown's avatar
unknown committed
5555
	prebuilt->hint_need_to_fetch_extra_cols = 0;
unknown's avatar
unknown committed
5556 5557

	prebuilt->read_just_key = 0;
5558
	prebuilt->keep_other_fields_on_keyread = FALSE;
unknown's avatar
unknown committed
5559 5560 5561 5562 5563 5564

	if (lock_type == F_WRLCK) {

		/* If this is a SELECT, then it is in UPDATE TABLE ...
		or SELECT ... FOR UPDATE */
		prebuilt->select_lock_type = LOCK_X;
unknown's avatar
unknown committed
5565
		prebuilt->stored_select_lock_type = LOCK_X;
unknown's avatar
unknown committed
5566 5567 5568
	}

	if (lock_type != F_UNLCK) {
unknown's avatar
unknown committed
5569
		/* MySQL is setting a new table lock */
unknown's avatar
unknown committed
5570

unknown's avatar
unknown committed
5571 5572
		/* Set the MySQL flag to mark that there is an active
		transaction */
5573 5574 5575 5576 5577
                if (trx->active_trans == 0) {

                        register_trans(thd);
                        trx->active_trans = 1;
                }
unknown's avatar
unknown committed
5578

unknown's avatar
unknown committed
5579
		trx->n_mysql_tables_in_use++;
5580
		prebuilt->mysql_has_locked = TRUE;
unknown's avatar
unknown committed
5581

unknown's avatar
unknown committed
5582 5583
		if (trx->n_mysql_tables_in_use == 1) {
		        trx->isolation_level = innobase_map_isolation_level(
unknown's avatar
unknown committed
5584 5585
						(enum_tx_isolation)
						thd->variables.tx_isolation);
unknown's avatar
unknown committed
5586
		}
unknown's avatar
unknown committed
5587 5588

		if (trx->isolation_level == TRX_ISO_SERIALIZABLE
unknown's avatar
unknown committed
5589 5590
		    && prebuilt->select_lock_type == LOCK_NONE
		    && (thd->options
unknown's avatar
unknown committed
5591
				& (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
unknown's avatar
unknown committed
5592

unknown's avatar
unknown committed
5593 5594
			/* To get serializable execution, we let InnoDB
			conceptually add 'LOCK IN SHARE MODE' to all SELECTs
unknown's avatar
unknown committed
5595 5596 5597 5598 5599
			which otherwise would have been consistent reads. An
			exception is consistent reads in the AUTOCOMMIT=1 mode:
			we know that they are read-only transactions, and they
			can be serialized also if performed as consistent
			reads. */
unknown's avatar
unknown committed
5600 5601 5602 5603

			prebuilt->select_lock_type = LOCK_S;
		}

unknown's avatar
unknown committed
5604 5605 5606 5607 5608 5609
		/* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
		TABLES if AUTOCOMMIT=1. It does not make much sense to acquire
		an InnoDB table lock if it is released immediately at the end
		of LOCK TABLES, and InnoDB's table locks in that case cause
		VERY easily deadlocks. */

unknown's avatar
unknown committed
5610
		if (prebuilt->select_lock_type != LOCK_NONE) {
unknown's avatar
unknown committed
5611

5612
			if (thd->in_lock_tables &&
unknown's avatar
unknown committed
5613 5614
			    thd->variables.innodb_table_locks &&
			    (thd->options & OPTION_NOT_AUTOCOMMIT)) {
unknown's avatar
unknown committed
5615

5616
				ulint	error;
5617 5618
				error = row_lock_table_for_mysql(prebuilt,
							NULL, LOCK_TABLE_EXP);
5619 5620 5621 5622 5623 5624 5625

				if (error != DB_SUCCESS) {
					error = convert_error_code_to_mysql(
						error, user_thd);
					DBUG_RETURN(error);
				}
			}
unknown's avatar
unknown committed
5626 5627 5628 5629

		  	trx->mysql_n_tables_locked++;
		}

5630
		DBUG_RETURN(0);
unknown's avatar
unknown committed
5631
	}
unknown's avatar
unknown committed
5632

unknown's avatar
unknown committed
5633
	/* MySQL is releasing a table lock */
unknown's avatar
unknown committed
5634

unknown's avatar
unknown committed
5635 5636 5637
	trx->n_mysql_tables_in_use--;
	prebuilt->mysql_has_locked = FALSE;
	auto_inc_counter_for_this_stat = 0;
unknown's avatar
unknown committed
5638 5639
	if (trx->n_lock_table_exp) {
		row_unlock_tables_for_mysql(trx);
5640
	}
5641

unknown's avatar
unknown committed
5642 5643
	/* If the MySQL lock count drops to zero we know that the current SQL
	statement has ended */
unknown's avatar
unknown committed
5644

unknown's avatar
unknown committed
5645
	if (trx->n_mysql_tables_in_use == 0) {
unknown's avatar
unknown committed
5646

unknown's avatar
unknown committed
5647 5648 5649
	        trx->mysql_n_tables_locked = 0;
		prebuilt->used_in_HANDLER = FALSE;
			
unknown's avatar
unknown committed
5650 5651 5652 5653
		/* Release a possible FIFO ticket and search latch. Since we
		may reserve the kernel mutex, we have to release the search
		system latch first to obey the latching order. */

unknown's avatar
unknown committed
5654
		innobase_release_stat_resources(trx);
unknown's avatar
unknown committed
5655

unknown's avatar
unknown committed
5656
		if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
5657 5658
                        if (trx->active_trans != 0) {
                                innobase_commit(thd, TRUE);
unknown's avatar
unknown committed
5659 5660
			}
		} else {
unknown's avatar
unknown committed
5661 5662 5663
			if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
	    						&& trx->read_view) {

unknown's avatar
unknown committed
5664
				/* At low transaction isolation levels we let
unknown's avatar
unknown committed
5665 5666
				each consistent read set its own snapshot */

unknown's avatar
unknown committed
5667
				read_view_close_for_mysql(trx);
unknown's avatar
unknown committed
5668
			}
unknown's avatar
unknown committed
5669 5670 5671
		}
	}

5672
	DBUG_RETURN(0);
unknown's avatar
unknown committed
5673 5674
}

5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703
/**********************************************************************
With this function MySQL request a transactional lock to a table when
user issued query LOCK TABLES..WHERE ENGINE = InnoDB. */

int
ha_innobase::transactional_table_lock(
/*==================================*/
			        /* out: 0 */
	THD*	thd,		/* in: handle to the user thread */
	int 	lock_type)	/* in: lock type */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

  	DBUG_ENTER("ha_innobase::transactional_table_lock");
	DBUG_PRINT("enter",("lock_type: %d", lock_type));

	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(thd);

 	if (prebuilt->table->ibd_file_missing && !current_thd->tablespace_op) {
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"MySQL is trying to use a table handle but the .ibd file for\n"
"table %s does not exist.\n"
"Have you deleted the .ibd file from the database directory under\n"
unknown's avatar
unknown committed
5704
"the MySQL datadir?"
5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722
"Look from section 15.1 of http://www.innodb.com/ibman.html\n"
"how you can resolve the problem.\n",
				prebuilt->table->name);
		DBUG_RETURN(HA_ERR_CRASHED);
	}

	trx = prebuilt->trx;

	prebuilt->sql_stat_start = TRUE;
	prebuilt->hint_need_to_fetch_extra_cols = 0;

	prebuilt->read_just_key = 0;
	prebuilt->keep_other_fields_on_keyread = FALSE;

	if (lock_type == F_WRLCK) {
		prebuilt->select_lock_type = LOCK_X;
		prebuilt->stored_select_lock_type = LOCK_X;
	} else if (lock_type == F_RDLCK) {
unknown's avatar
unknown committed
5723 5724
		prebuilt->select_lock_type = LOCK_S;
		prebuilt->stored_select_lock_type = LOCK_S;
5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736
	} else {
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"MySQL is trying to set transactional table lock with corrupted lock type\n"
"to table %s, lock type %d does not exist.\n",
				prebuilt->table->name, lock_type);
		DBUG_RETURN(HA_ERR_CRASHED);
	}

	/* MySQL is setting a new transactional table lock */

	/* Set the MySQL flag to mark that there is an active transaction */
unknown's avatar
unknown committed
5737 5738 5739 5740 5741
        if (trx->active_trans == 0) {

                register_trans(thd);
                trx->active_trans = 1;
        }
5742 5743 5744 5745

	if (thd->in_lock_tables && thd->variables.innodb_table_locks) {
		ulint	error = DB_SUCCESS;

unknown's avatar
unknown committed
5746
		error = row_lock_table_for_mysql(prebuilt,NULL,
5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766
						LOCK_TABLE_TRANSACTIONAL);

		if (error != DB_SUCCESS) {
			error = convert_error_code_to_mysql(error, user_thd);
			DBUG_RETURN(error);
		}

		if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {

			/* Store the current undo_no of the transaction 
			so that we know where to roll back if we have 
			to roll back the next SQL statement */

			trx_mark_sql_stat_end(trx);
		}
	}

	DBUG_RETURN(0);
}

5767 5768 5769 5770 5771 5772 5773 5774 5775 5776
/****************************************************************************
Here we export InnoDB status variables to MySQL.  */

void
innodb_export_status(void)
/*======================*/
{
  srv_export_innodb_status();
}

unknown's avatar
unknown committed
5777
/****************************************************************************
unknown's avatar
unknown committed
5778
Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
unknown's avatar
unknown committed
5779 5780
Monitor to the client. */

unknown's avatar
unknown committed
5781
bool
unknown's avatar
unknown committed
5782 5783 5784 5785
innodb_show_status(
/*===============*/
	THD*	thd)	/* in: the MySQL query thread of the caller */
{
unknown's avatar
unknown committed
5786
        Protocol        *protocol= thd->protocol;
unknown's avatar
unknown committed
5787
	trx_t*		trx;
unknown's avatar
unknown committed
5788 5789
	long		flen;
	char*		str;
unknown's avatar
unknown committed
5790

unknown's avatar
unknown committed
5791 5792
        DBUG_ENTER("innodb_show_status");

5793
        if (have_innodb != SHOW_OPTION_YES) {
unknown's avatar
unknown committed
5794 5795 5796
                my_message(ER_NOT_SUPPORTED_YET,
          "Cannot call SHOW INNODB STATUS because skip-innodb is defined",
                           MYF(0));
unknown's avatar
unknown committed
5797
                DBUG_RETURN(TRUE);
unknown's avatar
unknown committed
5798
        }
unknown's avatar
unknown committed
5799

unknown's avatar
unknown committed
5800 5801 5802 5803
	trx = check_trx_exists(thd);

	innobase_release_stat_resources(trx);

5804
	/* We let the InnoDB Monitor to output at most 64000 bytes of text. */
5805

5806
	mutex_enter_noninline(&srv_monitor_file_mutex);
5807
	rewind(srv_monitor_file);
unknown's avatar
unknown committed
5808

5809 5810
	srv_printf_innodb_monitor(srv_monitor_file);
	flen = ftell(srv_monitor_file);
5811
	os_file_set_eof(srv_monitor_file);
unknown's avatar
unknown committed
5812

5813 5814 5815
	if (flen < 0) {
		flen = 0;
	} else if (flen > 64000 - 1) {
5816 5817
		flen = 64000 - 1;
	}
unknown's avatar
unknown committed
5818

5819 5820
	/* allocate buffer for the string, and
	read the contents of the temporary file */
unknown's avatar
unknown committed
5821

unknown's avatar
unknown committed
5822 5823 5824 5825
	if (!(str = my_malloc(flen + 1, MYF(0)))) {
        	mutex_exit_noninline(&srv_monitor_file_mutex);

        	DBUG_RETURN(TRUE);
unknown's avatar
unknown committed
5826
        }
unknown's avatar
unknown committed
5827

unknown's avatar
unknown committed
5828 5829
	rewind(srv_monitor_file);
	flen = fread(str, 1, flen, srv_monitor_file);
unknown's avatar
unknown committed
5830

5831
	mutex_exit_noninline(&srv_monitor_file_mutex);
5832

unknown's avatar
unknown committed
5833 5834
	List<Item> field_list;

5835
	field_list.push_back(new Item_empty_string("Status", flen));
unknown's avatar
unknown committed
5836

5837 5838
	if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS |
                                               Protocol::SEND_EOF)) {
5839
		my_free(str, MYF(0));
unknown's avatar
unknown committed
5840

unknown's avatar
unknown committed
5841
		DBUG_RETURN(TRUE);
unknown's avatar
unknown committed
5842 5843
	}

unknown's avatar
unknown committed
5844 5845 5846
        protocol->prepare_for_resend();
        protocol->store(str, flen, system_charset_info);
        my_free(str, MYF(0));
unknown's avatar
unknown committed
5847

unknown's avatar
unknown committed
5848
        if (protocol->write()) {
unknown's avatar
unknown committed
5849

unknown's avatar
unknown committed
5850 5851
        	DBUG_RETURN(TRUE);
	}
unknown's avatar
unknown committed
5852
	send_eof(thd);
unknown's avatar
unknown committed
5853

unknown's avatar
unknown committed
5854
  	DBUG_RETURN(FALSE);
unknown's avatar
unknown committed
5855 5856
}

unknown's avatar
unknown committed
5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888
/****************************************************************************
Implements the SHOW MUTEX STATUS command. . */

bool
innodb_mutex_show_status(
/*===============*/
  THD*  thd)  /* in: the MySQL query thread of the caller */
{
  Protocol        *protocol= thd->protocol;
  List<Item> field_list;
  mutex_t*  mutex;
  ulint   rw_lock_count= 0;
  ulint   rw_lock_count_spin_loop= 0;
  ulint   rw_lock_count_spin_rounds= 0;
  ulint   rw_lock_count_os_wait= 0;
  ulint   rw_lock_count_os_yield= 0;
  ulonglong rw_lock_wait_time= 0;
  DBUG_ENTER("innodb_mutex_show_status");

  field_list.push_back(new Item_empty_string("Mutex", FN_REFLEN));
  field_list.push_back(new Item_empty_string("Module", FN_REFLEN));
  field_list.push_back(new Item_uint("Count", 21));
  field_list.push_back(new Item_uint("Spin_waits", 21));
  field_list.push_back(new Item_uint("Spin_rounds", 21));
  field_list.push_back(new Item_uint("OS_waits", 21));
  field_list.push_back(new Item_uint("OS_yields", 21));
  field_list.push_back(new Item_uint("OS_waits_time", 21));

  if (protocol->send_fields(&field_list,
                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
    DBUG_RETURN(TRUE);

unknown's avatar
unknown committed
5889 5890 5891
#ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER
    mutex_enter(&mutex_list_mutex);
#endif
unknown's avatar
unknown committed
5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912

  mutex = UT_LIST_GET_FIRST(mutex_list);

  while ( mutex != NULL )
  {
    if (mutex->mutex_type != 1)
    {
      if (mutex->count_using > 0)
      {
        protocol->prepare_for_resend();
        protocol->store(mutex->cmutex_name, system_charset_info);
        protocol->store(mutex->cfile_name, system_charset_info);
        protocol->store((ulonglong)mutex->count_using);
        protocol->store((ulonglong)mutex->count_spin_loop);
        protocol->store((ulonglong)mutex->count_spin_rounds);
        protocol->store((ulonglong)mutex->count_os_wait);
        protocol->store((ulonglong)mutex->count_os_yield);
        protocol->store((ulonglong)mutex->lspent_time/1000);

        if (protocol->write())
        {
unknown's avatar
unknown committed
5913 5914 5915
#ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER
          mutex_exit(&mutex_list_mutex);
#endif
unknown's avatar
unknown committed
5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947
          DBUG_RETURN(1);
        }
      }
    }
    else
    {
      rw_lock_count += mutex->count_using;
      rw_lock_count_spin_loop += mutex->count_spin_loop;
      rw_lock_count_spin_rounds += mutex->count_spin_rounds;
      rw_lock_count_os_wait += mutex->count_os_wait;
      rw_lock_count_os_yield += mutex->count_os_yield;
      rw_lock_wait_time += mutex->lspent_time;
    }

    mutex = UT_LIST_GET_NEXT(list, mutex);
  }

  protocol->prepare_for_resend();
  protocol->store("rw_lock_mutexes", system_charset_info);
  protocol->store("", system_charset_info);
  protocol->store((ulonglong)rw_lock_count);
  protocol->store((ulonglong)rw_lock_count_spin_loop);
  protocol->store((ulonglong)rw_lock_count_spin_rounds);
  protocol->store((ulonglong)rw_lock_count_os_wait);
  protocol->store((ulonglong)rw_lock_count_os_yield);
  protocol->store((ulonglong)rw_lock_wait_time/1000);

  if (protocol->write())
  {
    DBUG_RETURN(1);
  }

unknown's avatar
unknown committed
5948 5949 5950
#ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER
      mutex_exit(&mutex_list_mutex);
#endif
unknown's avatar
unknown committed
5951 5952 5953 5954
  send_eof(thd);
  DBUG_RETURN(FALSE);
}

5955 5956 5957 5958 5959
/****************************************************************************
 Handling the shared INNOBASE_SHARE structure that is needed to provide table
 locking.
****************************************************************************/

5960
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
5961 5962 5963
			      my_bool not_used __attribute__((unused)))
{
  *length=share->table_name_length;
5964
  return (mysql_byte*) share->table_name;
5965 5966 5967 5968 5969
}

static INNOBASE_SHARE *get_share(const char *table_name)
{
  INNOBASE_SHARE *share;
unknown's avatar
unknown committed
5970
  pthread_mutex_lock(&innobase_share_mutex);
5971
  uint length=(uint) strlen(table_name);
unknown's avatar
unknown committed
5972
  if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables,
5973
					(mysql_byte*) table_name,
5974 5975 5976 5977 5978 5979 5980 5981
					    length)))
  {
    if ((share=(INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1,
				       MYF(MY_WME | MY_ZEROFILL))))
    {
      share->table_name_length=length;
      share->table_name=(char*) (share+1);
      strmov(share->table_name,table_name);
unknown's avatar
SCRUM  
unknown committed
5982
      if (my_hash_insert(&innobase_open_tables, (mysql_byte*) share))
5983
      {
unknown's avatar
unknown committed
5984
        pthread_mutex_unlock(&innobase_share_mutex);
5985 5986 5987 5988
	my_free((gptr) share,0);
	return 0;
      }
      thr_lock_init(&share->lock);
5989
      pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST);
5990 5991 5992
    }
  }
  share->use_count++;
unknown's avatar
unknown committed
5993
  pthread_mutex_unlock(&innobase_share_mutex);
5994 5995 5996 5997 5998
  return share;
}

static void free_share(INNOBASE_SHARE *share)
{
unknown's avatar
unknown committed
5999
  pthread_mutex_lock(&innobase_share_mutex);
6000 6001
  if (!--share->use_count)
  {
6002
    hash_delete(&innobase_open_tables, (mysql_byte*) share);
6003 6004 6005 6006
    thr_lock_delete(&share->lock);
    pthread_mutex_destroy(&share->mutex);
    my_free((gptr) share, MYF(0));
  }
unknown's avatar
unknown committed
6007
  pthread_mutex_unlock(&innobase_share_mutex);
6008
}
6009 6010

/*********************************************************************
unknown's avatar
unknown committed
6011
Converts a MySQL table lock stored in the 'lock' field of the handle to
unknown's avatar
unknown committed
6012 6013 6014 6015 6016 6017
a proper type before storing pointer to the lock into an array of pointers.
MySQL also calls this if it wants to reset some table locks to a not-locked
state during the processing of an SQL query. An example is that during a
SELECT the read lock is released early on the 'const' tables where we only
fetch one row. MySQL does not call this when it releases all locks at the
end of an SQL statement. */
6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034

THR_LOCK_DATA**
ha_innobase::store_lock(
/*====================*/
						/* out: pointer to the next
						element in the 'to' array */
	THD*			thd,		/* in: user thread handle */
	THR_LOCK_DATA**		to,		/* in: pointer to an array
						of pointers to lock structs;
						pointer to the 'lock' field
						of current handle is stored
						next to this array */
	enum thr_lock_type 	lock_type)	/* in: lock type to store in
						'lock' */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;

6035
	if ((lock_type == TL_READ && thd->in_lock_tables) ||           
unknown's avatar
unknown committed
6036 6037
	    (lock_type == TL_READ_HIGH_PRIORITY && thd->in_lock_tables) ||
	    lock_type == TL_READ_WITH_SHARED_LOCKS ||
unknown's avatar
unknown committed
6038 6039 6040
	    lock_type == TL_READ_NO_INSERT ||
	    thd->lex->sql_command != SQLCOM_SELECT) {

unknown's avatar
unknown committed
6041 6042 6043 6044 6045
		/* The OR cases above are in this order:
		1) MySQL is doing LOCK TABLES ... READ LOCAL, or
		2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
		3) this is a SELECT ... IN SHARE MODE, or
		4) we are doing a complex SQL statement like
unknown's avatar
unknown committed
6046
		INSERT INTO ... SELECT ... and the logical logging (MySQL
unknown's avatar
unknown committed
6047
		binlog) requires the use of a locking read, or
unknown's avatar
unknown committed
6048 6049 6050
		MySQL is doing LOCK TABLES ... READ.
		5) we let InnoDB do locking reads for all SQL statements that
		are not simple SELECTs; note that select_lock_type in this
unknown's avatar
unknown committed
6051 6052 6053 6054 6055 6056
		case may get strengthened in ::external_lock() to LOCK_X.
		Note that we MUST use a locking read in all data modifying
		SQL statements, because otherwise the execution would not be
		serializable, and also the results from the update could be
		unexpected if an obsolete consistent read view would be
		used. */
unknown's avatar
unknown committed
6057

6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078
		if (srv_locks_unsafe_for_binlog &&
		    prebuilt->trx->isolation_level != TRX_ISO_SERIALIZABLE &&
		    (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT) &&
		    thd->lex->sql_command != SQLCOM_SELECT &&
		    thd->lex->sql_command != SQLCOM_UPDATE_MULTI &&
		    thd->lex->sql_command != SQLCOM_DELETE_MULTI ) {

			/* In case we have innobase_locks_unsafe_for_binlog
			option set and isolation level of the transaction
			is not set to serializable and MySQL is doing
			INSERT INTO...SELECT without FOR UPDATE or IN
			SHARE MODE we use consistent read for select. 
			Similarly, in case of DELETE...SELECT and
			UPDATE...SELECT when these are not multi table.*/

			prebuilt->select_lock_type = LOCK_NONE;
			prebuilt->stored_select_lock_type = LOCK_NONE;
		} else {
			prebuilt->select_lock_type = LOCK_S;
			prebuilt->stored_select_lock_type = LOCK_S;
		}
unknown's avatar
unknown committed
6079

unknown's avatar
unknown committed
6080 6081 6082 6083 6084 6085 6086
	} else if (lock_type != TL_IGNORE) {

	        /* In ha_berkeley.cc there is a comment that MySQL
	        may in exceptional cases call this with TL_IGNORE also
	        when it is NOT going to release the lock. */

	        /* We set possible LOCK_X value in external_lock, not yet
6087
		here even if this would be SELECT ... FOR UPDATE */
unknown's avatar
unknown committed
6088

6089
		prebuilt->select_lock_type = LOCK_NONE;
unknown's avatar
unknown committed
6090
		prebuilt->stored_select_lock_type = LOCK_NONE;
6091 6092 6093 6094
	}

	if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {

unknown's avatar
unknown committed
6095
    		/* If we are not doing a LOCK TABLE or DISCARD/IMPORT
6096
		TABLESPACE or TRUNCATE TABLE, then allow multiple writers */
6097 6098

    		if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
unknown's avatar
unknown committed
6099
	 	    lock_type <= TL_WRITE) && !thd->in_lock_tables
6100 6101
		    && !thd->tablespace_op
		    && thd->lex->sql_command != SQLCOM_TRUNCATE) {
6102 6103 6104 6105

      			lock_type = TL_WRITE_ALLOW_WRITE;
      		}

unknown's avatar
unknown committed
6106 6107 6108 6109 6110 6111 6112 6113 6114 6115
		/* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
		MySQL would use the lock TL_READ_NO_INSERT on t2, and that
		would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
		to t2. Convert the lock to a normal read lock to allow
		concurrent inserts to t2. */
      		
		if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables) {
			lock_type = TL_READ;
		}
		
6116 6117 6118 6119
 		lock.type=lock_type;
  	}

  	*to++= &lock;
6120

6121 6122 6123
	return(to);
}

6124
/***********************************************************************
unknown's avatar
unknown committed
6125 6126
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
unknown's avatar
unknown committed
6127
counter if it already has been initialized. In paramete ret returns
unknown's avatar
unknown committed
6128
the value of the auto-inc counter. */
6129

unknown's avatar
unknown committed
6130 6131 6132 6133 6134 6135
int
ha_innobase::innobase_read_and_init_auto_inc(
/*=========================================*/
				/* out: 0 or error code: deadlock or
				lock wait timeout */
	longlong*	ret)	/* out: auto-inc value */
6136
{
unknown's avatar
unknown committed
6137
  	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
unknown's avatar
unknown committed
6138
    	longlong        auto_inc;
unknown's avatar
unknown committed
6139
  	int     	error;
6140

unknown's avatar
unknown committed
6141
  	ut_a(prebuilt);
unknown's avatar
unknown committed
6142
	ut_a(prebuilt->trx ==
6143
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
6144 6145
	ut_a(prebuilt->table);
	
unknown's avatar
unknown committed
6146 6147 6148 6149 6150
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);

unknown's avatar
unknown committed
6151
	auto_inc = dict_table_autoinc_read(prebuilt->table);
unknown's avatar
unknown committed
6152

unknown's avatar
unknown committed
6153 6154 6155 6156 6157 6158
	if (auto_inc != 0) {
		/* Already initialized */
		*ret = auto_inc;
	
		return(0);
	}
6159

unknown's avatar
unknown committed
6160
	error = row_lock_table_autoinc_for_mysql(prebuilt);
unknown's avatar
unknown committed
6161

unknown's avatar
unknown committed
6162 6163
	if (error != DB_SUCCESS) {
		error = convert_error_code_to_mysql(error, user_thd);
6164

unknown's avatar
unknown committed
6165 6166
		goto func_exit;
	}	
unknown's avatar
unknown committed
6167

unknown's avatar
unknown committed
6168 6169
	/* Check again if someone has initialized the counter meanwhile */
	auto_inc = dict_table_autoinc_read(prebuilt->table);
unknown's avatar
unknown committed
6170

unknown's avatar
unknown committed
6171 6172 6173 6174
	if (auto_inc != 0) {
		*ret = auto_inc;
	
		return(0);
unknown's avatar
unknown committed
6175
	}
6176

unknown's avatar
unknown committed
6177
  	(void) extra(HA_EXTRA_KEYREAD);
6178
  	index_init(table->s->next_number_index);
unknown's avatar
unknown committed
6179 6180 6181 6182 6183 6184 6185

	/* We use an exclusive lock when we read the max key value from the
  	auto-increment column index. This is because then build_template will
  	advise InnoDB to fetch all columns. In SHOW TABLE STATUS the query
  	id of the auto-increment column is not changed, and previously InnoDB
  	did not fetch it, causing SHOW TABLE STATUS to show wrong values
  	for the autoinc column. */
6186

unknown's avatar
unknown committed
6187
  	prebuilt->select_lock_type = LOCK_X;
6188

unknown's avatar
unknown committed
6189 6190 6191
  	/* Play safe and also give in another way the hint to fetch
  	all columns in the key: */
  	
unknown's avatar
unknown committed
6192
	prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
6193

unknown's avatar
unknown committed
6194
	prebuilt->trx->mysql_n_tables_locked += 1;
6195
  
unknown's avatar
unknown committed
6196
	error = index_last(table->record[1]);
6197

unknown's avatar
unknown committed
6198
  	if (error) {
unknown's avatar
unknown committed
6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209
		if (error == HA_ERR_END_OF_FILE) {
			/* The table was empty, initialize to 1 */
			auto_inc = 1;

			error = 0;
		} else {
			/* Deadlock or a lock wait timeout */
  			auto_inc = -1;

  			goto func_exit;
  		}
unknown's avatar
unknown committed
6210
  	} else {
unknown's avatar
unknown committed
6211 6212
		/* Initialize to max(col) + 1 */
    		auto_inc = (longlong) table->next_number_field->
6213
                        	val_int_offset(table->s->rec_buff_length) + 1;
unknown's avatar
unknown committed
6214
  	}
6215

unknown's avatar
unknown committed
6216 6217 6218
	dict_table_autoinc_initialize(prebuilt->table, auto_inc);

func_exit:
unknown's avatar
unknown committed
6219
  	(void) extra(HA_EXTRA_NO_KEYREAD);
6220

unknown's avatar
unknown committed
6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233
	index_end();

	*ret = auto_inc;

  	return(error);
}

/***********************************************************************
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
counter if it already has been initialized. Returns the value of the
auto-inc counter. */

6234
ulonglong
unknown's avatar
unknown committed
6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246
ha_innobase::get_auto_increment()
/*=============================*/
                         /* out: auto-increment column value, -1 if error
                         (deadlock or lock wait timeout) */
{
  	longlong        nr;
  	int     	error;
	
	error = innobase_read_and_init_auto_inc(&nr);

	if (error) {

unknown's avatar
unknown committed
6247
          	return(~(ulonglong) 0);
unknown's avatar
unknown committed
6248
	}
6249

6250
	return((ulonglong) nr);
6251 6252
}

unknown's avatar
unknown committed
6253 6254 6255 6256
/***********************************************************************
Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
If there is no explicitly declared non-null unique key or a primary key, then
InnoDB internally uses the row id as the primary key. */
unknown's avatar
unknown committed
6257

6258 6259
int
ha_innobase::cmp_ref(
unknown's avatar
unknown committed
6260 6261 6262 6263 6264 6265 6266
/*=================*/
				/* out: < 0 if ref1 < ref2, 0 if equal, else
				> 0 */
	const mysql_byte* ref1,	/* in: an (internal) primary key value in the
				MySQL key value format */
	const mysql_byte* ref2)	/* in: an (internal) primary key value in the
				MySQL key value format */
6267
{
unknown's avatar
unknown committed
6268
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
6269
	enum_field_types mysql_type;
unknown's avatar
unknown committed
6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290
	Field*		field;
	KEY_PART_INFO*	key_part;
	KEY_PART_INFO*	key_part_end;
	uint		len1;
	uint		len2;
	int 		result;

	if (prebuilt->clust_index_was_generated) {
		/* The 'ref' is an InnoDB row id */

		return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
	}

	/* Do a type-aware comparison of primary key fields. PK fields
	are always NOT NULL, so no checks for NULL are performed. */

	key_part = table->key_info[table->s->primary_key].key_part;

	key_part_end = key_part
			+ table->key_info[table->s->primary_key].key_parts;

6291 6292 6293
	for (; key_part != key_part_end; ++key_part) {
		field = key_part->field;
		mysql_type = field->type();
unknown's avatar
unknown committed
6294

6295 6296 6297 6298 6299
		if (mysql_type == FIELD_TYPE_TINY_BLOB
		    || mysql_type == FIELD_TYPE_MEDIUM_BLOB
		    || mysql_type == FIELD_TYPE_BLOB
		    || mysql_type == FIELD_TYPE_LONG_BLOB) {
		    
unknown's avatar
unknown committed
6300 6301 6302 6303 6304 6305
			/* In the MySQL key value format, a column prefix of
			a BLOB is preceded by a 2-byte length field */

			len1 = innobase_read_from_2_little_endian(ref1);
			len2 = innobase_read_from_2_little_endian(ref2);

6306 6307
			ref1 += 2;
			ref2 += 2;
unknown's avatar
unknown committed
6308 6309
			result = ((Field_blob*)field)->cmp(
						    (const char*)ref1, len1,
6310 6311
			                            (const char*)ref2, len2);
		} else {
unknown's avatar
unknown committed
6312 6313 6314 6315 6316 6317 6318
			result = field->cmp((const char*)ref1,
					    (const char*)ref2);
		}

		if (result) {

			return(result);
6319 6320 6321 6322 6323
		}

		ref1 += key_part->length;
		ref2 += key_part->length;
	}
unknown's avatar
unknown committed
6324 6325

	return(0);
6326 6327
}

unknown's avatar
unknown committed
6328 6329
char*
ha_innobase::get_mysql_bin_log_name()
unknown's avatar
unknown committed
6330
{
unknown's avatar
unknown committed
6331
	return(trx_sys_mysql_bin_log_name);
unknown's avatar
unknown committed
6332 6333
}

unknown's avatar
unknown committed
6334 6335
ulonglong
ha_innobase::get_mysql_bin_log_pos()
unknown's avatar
unknown committed
6336
{
unknown's avatar
unknown committed
6337 6338 6339 6340
  	/* trx... is ib_longlong, which is a typedef for a 64-bit integer
	(__int64 or longlong) so it's ok to cast it to ulonglong. */

  	return(trx_sys_mysql_bin_log_pos);
unknown's avatar
unknown committed
6341 6342
}

6343
extern "C" {
6344
/**********************************************************************
unknown's avatar
unknown committed
6345 6346 6347 6348 6349 6350 6351
This function is used to find the storage length in bytes of the first n
characters for prefix indexes using a multibyte character set. The function
finds charset information and returns length of prefix_len characters in the
index field in bytes.

NOTE: the prototype of this function is copied to data0type.c! If you change
this function, you MUST change also data0type.c! */
6352

unknown's avatar
unknown committed
6353 6354 6355 6356 6357
ulint
innobase_get_at_most_n_mbchars(
/*===========================*/
				/* out: number of bytes occupied by the first
				n characters */
6358
	ulint charset_id,	/* in: character set id */
unknown's avatar
unknown committed
6359 6360 6361 6362 6363
	ulint prefix_len,	/* in: prefix length in bytes of the index
				(this has to be divided by mbmaxlen to get the
				number of CHARACTERS n in the prefix) */
	ulint data_len,         /* in: length of the string in bytes */
	const char* str)	/* in: character string */
6364
{
6365
	ulint char_length;	/* character length in bytes */
unknown's avatar
unknown committed
6366
	ulint n_chars;		/* number of characters in prefix */
6367
	CHARSET_INFO* charset;	/* charset used in the field */
6368

unknown's avatar
unknown committed
6369
	charset = get_charset(charset_id, MYF(MY_WME));
6370

6371 6372
	ut_ad(charset);
	ut_ad(charset->mbmaxlen);
6373

unknown's avatar
unknown committed
6374
	/* Calculate how many characters at most the prefix index contains */
6375

unknown's avatar
unknown committed
6376
	n_chars = prefix_len / charset->mbmaxlen;
6377

unknown's avatar
unknown committed
6378 6379 6380
	/* If the charset is multi-byte, then we must find the length of the
	first at most n chars in the string. If the string contains less
	characters than n, then we return the length to the end of the last
unknown's avatar
unknown committed
6381
	character. */
6382

unknown's avatar
unknown committed
6383 6384
	if (charset->mbmaxlen > 1) {
		/* my_charpos() returns the byte length of the first n_chars
unknown's avatar
unknown committed
6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400
		characters, or a value bigger than the length of str, if
		there were not enough full characters in str.

		Why does the code below work:
		Suppose that we are looking for n UTF-8 characters.

		1) If the string is long enough, then the prefix contains at
		least n complete UTF-8 characters + maybe some extra
		characters + an incomplete UTF-8 character. No problem in
		this case. The function returns the pointer to the
		end of the nth character.

		2) If the string is not long enough, then the string contains
		the complete value of a column, that is, only complete UTF-8
		characters, and we can store in the column prefix index the
		whole string. */
unknown's avatar
unknown committed
6401

unknown's avatar
unknown committed
6402 6403 6404 6405 6406
		char_length = my_charpos(charset, str,
						str + data_len, n_chars);
		if (char_length > data_len) {
			char_length = data_len;
		}		
unknown's avatar
unknown committed
6407
	} else {
unknown's avatar
unknown committed
6408 6409 6410 6411 6412
		if (data_len < prefix_len) {
			char_length = data_len;
		} else {
			char_length = prefix_len;
		}
6413
	}
6414

unknown's avatar
unknown committed
6415
	return(char_length);
6416 6417 6418
}
}

6419 6420
extern "C" {
/**********************************************************************
6421 6422 6423
This function returns true if 

1) SQL-query in the current thread
6424
is either REPLACE or LOAD DATA INFILE REPLACE. 
6425 6426 6427 6428

2) SQL-query in the current thread
is INSERT ON DUPLICATE KEY UPDATE.

6429 6430 6431 6432
NOTE that /mysql/innobase/row/row0ins.c must contain the 
prototype for this function ! */

ibool
6433
innobase_query_is_update(void)
6434 6435 6436 6437 6438 6439
/*===========================*/
{
	THD*	thd;
	
	thd = (THD *)innobase_current_thd();
	
unknown's avatar
unknown committed
6440 6441 6442 6443 6444 6445
	if (thd->lex->sql_command == SQLCOM_REPLACE ||
	    thd->lex->sql_command == SQLCOM_REPLACE_SELECT ||
	    (thd->lex->sql_command == SQLCOM_LOAD &&
	     thd->lex->duplicates == DUP_REPLACE)) {

		return(1);
6446
	}
6447

unknown's avatar
unknown committed
6448 6449 6450 6451
	if (thd->lex->sql_command == SQLCOM_INSERT &&
	    thd->lex->duplicates  == DUP_UPDATE) {

		return(1);
6452 6453
	}

unknown's avatar
unknown committed
6454
	return(0);
6455
}
unknown's avatar
unknown committed
6456 6457
}

6458 6459 6460
/***********************************************************************
This function is used to prepare X/Open XA distributed transaction   */

6461 6462 6463
int 
innobase_xa_prepare(
/*================*/
6464 6465 6466 6467 6468 6469 6470
			/* out: 0 or error number */
	THD*	thd,	/* in: handle to the MySQL thread of the user
			whose XA transaction should be prepared */
	bool	all)	/* in: TRUE - commit transaction
			FALSE - the current SQL statement ended */
{
	int error = 0;
unknown's avatar
unknown committed
6471 6472 6473 6474
        trx_t* trx = check_trx_exists(thd);

        if (thd->lex->sql_command != SQLCOM_XA_PREPARE) {

unknown's avatar
unknown committed
6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493
                /* For ibbackup to work the order of transactions in binlog
                and InnoDB must be the same. Consider the situation

                  thread1> prepare; write to binlog; ...
                          <context switch>
                  thread2> prepare; write to binlog; commit
                  thread1>                           ... commit

                To ensure this will not happen we're taking the mutex on
                prepare, and releasing it on commit.

                Note: only do it for normal commits, done via ha_commit_trans.
                If 2pc protocol is executed by external transaction
                coordinator, it will be just a regular MySQL client
                executing XA PREPARE and XA COMMIT commands.
                In this case we cannot know how many minutes or hours
                will be between XA PREPARE and XA COMMIT, and we don't want
                to block for undefined period of time.
                */
unknown's avatar
unknown committed
6494 6495 6496
                pthread_mutex_lock(&prepare_commit_mutex);
                trx->active_trans = 2;
        }
6497

6498 6499 6500 6501 6502
	if (!thd->variables.innodb_support_xa) {

		return(0);
	}

6503
        trx->xid=thd->transaction.xid;
6504 6505 6506 6507 6508 6509 6510 6511 6512 6513

	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

	if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) {

		fprintf(stderr,
6514
"InnoDB: Error: trx->active_trans == 0\n"
6515 6516 6517
"InnoDB: but trx->conc_state != TRX_NOT_STARTED\n");
	}

unknown's avatar
unknown committed
6518 6519
	if (all
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
unknown's avatar
unknown committed
6520 6521 6522

                /* We were instructed to prepare the whole transaction, or
                this is an SQL statement end and autocommit is on */
6523

unknown's avatar
unknown committed
6524
                ut_ad(trx->active_trans);
unknown's avatar
unknown committed
6525

6526 6527 6528 6529 6530 6531 6532 6533
		error = trx_prepare_for_mysql(trx);
	} else {
	        /* We just mark the SQL statement ended and do not do a
		transaction prepare */

		if (trx->auto_inc_lock) {
			/* If we had reserved the auto-inc lock for some
			table in this SQL statement we release it now */
unknown's avatar
unknown committed
6534

6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554
			row_unlock_table_autoinc_for_mysql(trx);
		}
		/* Store the current undo_no of the transaction so that we
		know where to roll back if we have to roll back the next
		SQL statement */

		trx_mark_sql_stat_end(trx);
	}

	/* Tell the InnoDB server that there might be work for utility
	threads: */

	srv_active_wake_master_thread();

        return error;
}

/***********************************************************************
This function is used to recover X/Open XA distributed transactions   */

6555 6556 6557
int 
innobase_xa_recover(
/*================*/
6558 6559 6560 6561 6562 6563
				/* out: number of prepared transactions 
				stored in xid_list */
	XID*    xid_list, 	/* in/out: prepared transactions */
	uint	len)		/* in: number of slots in xid_list */
{
	if (len == 0 || xid_list == NULL) {
unknown's avatar
unknown committed
6564 6565

		return(0);
6566 6567
	}

unknown's avatar
unknown committed
6568
	return(trx_recover_for_mysql(xid_list, len));
6569 6570 6571 6572 6573 6574
}

/***********************************************************************
This function is used to commit one X/Open XA distributed transaction
which is in the prepared state */

6575 6576 6577
int 
innobase_commit_by_xid(
/*===================*/
6578
			/* out: 0 or error number */
unknown's avatar
unknown committed
6579
	XID*	xid)	/* in: X/Open XA transaction identification */
6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597
{
	trx_t*	trx;

	trx = trx_get_trx_by_xid(xid);

	if (trx) {
		innobase_commit_low(trx);
		
		return(XA_OK);
	} else {
		return(XAER_NOTA);
	}
}

/***********************************************************************
This function is used to rollback one X/Open XA distributed transaction
which is in the prepared state */

6598 6599 6600
int 
innobase_rollback_by_xid(
/*=====================*/
6601
			/* out: 0 or error number */
unknown's avatar
unknown committed
6602
	XID	*xid)	/* in: X/Open XA transaction idenfification */
6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614
{
	trx_t*	trx;

	trx = trx_get_trx_by_xid(xid);

	if (trx) {
		return(innobase_rollback_trx(trx));
	} else {
		return(XAER_NOTA);
	}
}

6615
#endif /* HAVE_INNOBASE_DB */