NULL MERGE this ChangeSet to 5.1

Apply innodb-5.0-ss1696 snapshot

Fixes:
- Bug#20090: InnoDB: Error: trying to declare trx to enter InnoDB
- Bug#23710: crash_commit_before fails if innodb_file_per_table=1
  At InnoDB startup consider the case where log scan went beyond
  checkpoint_lsn as a crash and initiate crash recovery code path.
- Bug#28781: InnoDB increments auto-increment value incorrectly with ON DUPLICATE KEY UPDATE
  We need to do some special AUTOINC handling for the following case:
  INSERT INTO t (c1,c2) VALUES(x,y) ON DUPLICATE KEY UPDATE ...
  We need to use the AUTOINC counter that was actually used by
  MySQL in the UPDATE statement, which can be different from the
  value used in the INSERT statement.
- Bug#29097: fsp_get_available_space_in_free_extents() is capped at 4TB
  Fix by typecasting the variables before multiplying them, so that the
  result of the multiplication is of type "unsigned long long".
- Bug#29155: Innodb "Parallel recovery" is not prevented
  Fix by enabling file locking on FreeBSD.  It has been disabled because
  InnoDB has refused to start on FreeBSD & LinuxThreads, but now it
  starts just fine.
parent e2d64f28
...@@ -2806,7 +2806,7 @@ will be able to insert new data to the database without running out the ...@@ -2806,7 +2806,7 @@ will be able to insert new data to the database without running out the
tablespace. Only free extents are taken into account and we also subtract tablespace. Only free extents are taken into account and we also subtract
the safety margin required by the above function fsp_reserve_free_extents. */ the safety margin required by the above function fsp_reserve_free_extents. */
ulint ullint
fsp_get_available_space_in_free_extents( fsp_get_available_space_in_free_extents(
/*====================================*/ /*====================================*/
/* out: available space in kB */ /* out: available space in kB */
...@@ -2872,8 +2872,9 @@ fsp_get_available_space_in_free_extents( ...@@ -2872,8 +2872,9 @@ fsp_get_available_space_in_free_extents(
return(0); return(0);
} }
return(((n_free - reserve) * FSP_EXTENT_SIZE) return((ullint)(n_free - reserve)
* (UNIV_PAGE_SIZE / 1024)); * FSP_EXTENT_SIZE
* (UNIV_PAGE_SIZE / 1024));
} }
/************************************************************************ /************************************************************************
......
...@@ -245,7 +245,7 @@ will be able to insert new data to the database without running out the ...@@ -245,7 +245,7 @@ will be able to insert new data to the database without running out the
tablespace. Only free extents are taken into account and we also subtract tablespace. Only free extents are taken into account and we also subtract
the safety margin required by the above function fsp_reserve_free_extents. */ the safety margin required by the above function fsp_reserve_free_extents. */
ulint ullint
fsp_get_available_space_in_free_extents( fsp_get_available_space_in_free_extents(
/*====================================*/ /*====================================*/
/* out: available space in kB */ /* out: available space in kB */
......
...@@ -195,6 +195,8 @@ typedef __int64 ib_longlong; ...@@ -195,6 +195,8 @@ typedef __int64 ib_longlong;
typedef longlong ib_longlong; typedef longlong ib_longlong;
#endif #endif
typedef unsigned long long int ullint;
#ifndef __WIN__ #ifndef __WIN__
#if SIZEOF_LONG != SIZEOF_VOIDP #if SIZEOF_LONG != SIZEOF_VOIDP
#error "Error: InnoDB's ulint must be of the same size as void*" #error "Error: InnoDB's ulint must be of the same size as void*"
......
...@@ -57,6 +57,16 @@ ibool recv_needed_recovery = FALSE; ...@@ -57,6 +57,16 @@ ibool recv_needed_recovery = FALSE;
ibool recv_lsn_checks_on = FALSE; ibool recv_lsn_checks_on = FALSE;
/* There are two conditions under which we scan the logs, the first
is normal startup and the second is when we do a recovery from an
archive.
This flag is set if we are doing a scan from the last checkpoint during
startup. If we find log entries that were written after the last checkpoint
we know that the server was not cleanly shutdown. We must then initialize
the crash recovery environment before attempting to store these entries in
the log hash table. */
ibool recv_log_scan_is_startup_type = FALSE;
/* If the following is TRUE, the buffer pool file pages must be invalidated /* If the following is TRUE, the buffer pool file pages must be invalidated
after recovery and no ibuf operations are allowed; this becomes TRUE if after recovery and no ibuf operations are allowed; this becomes TRUE if
the log record hash table becomes too full, and log records must be merged the log record hash table becomes too full, and log records must be merged
...@@ -99,6 +109,16 @@ the recovery failed and the database may be corrupt. */ ...@@ -99,6 +109,16 @@ the recovery failed and the database may be corrupt. */
dulint recv_max_page_lsn; dulint recv_max_page_lsn;
/* prototypes */
/***********************************************************
Initialize crash recovery environment. Can be called iff
recv_needed_recovery == FALSE. */
static
void
recv_init_crash_recovery(void);
/*===========================*/
/************************************************************ /************************************************************
Creates the recovery system. */ Creates the recovery system. */
...@@ -2438,6 +2458,23 @@ recv_scan_log_recs( ...@@ -2438,6 +2458,23 @@ recv_scan_log_recs(
if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) { if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) {
/* We have found more entries. If this scan is
of startup type, we must initiate crash recovery
environment before parsing these log records. */
if (recv_log_scan_is_startup_type
&& !recv_needed_recovery) {
fprintf(stderr,
"InnoDB: Log scan progressed"
" past the checkpoint lsn %lu %lu\n",
(ulong) ut_dulint_get_high(
recv_sys->scanned_lsn),
(ulong) ut_dulint_get_low(
recv_sys->scanned_lsn));
recv_init_crash_recovery();
}
/* We were able to find more log data: add it to the /* We were able to find more log data: add it to the
parsing buffer if parse_start_lsn is already parsing buffer if parse_start_lsn is already
non-zero */ non-zero */
...@@ -2557,6 +2594,48 @@ recv_group_scan_log_recs( ...@@ -2557,6 +2594,48 @@ recv_group_scan_log_recs(
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
} }
/***********************************************************
Initialize crash recovery environment. Can be called iff
recv_needed_recovery == FALSE. */
static
void
recv_init_crash_recovery(void)
/*==========================*/
{
ut_a(!recv_needed_recovery);
recv_needed_recovery = TRUE;
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Database was not"
" shut down normally!\n"
"InnoDB: Starting crash recovery.\n");
fprintf(stderr,
"InnoDB: Reading tablespace information"
" from the .ibd files...\n");
fil_load_single_table_tablespaces();
/* If we are using the doublewrite method, we will
check if there are half-written pages in data files,
and restore them from the doublewrite buffer if
possible */
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
fprintf(stderr,
"InnoDB: Restoring possible"
" half-written data pages from"
" the doublewrite\n"
"InnoDB: buffer...\n");
trx_sys_doublewrite_init_or_restore_pages(TRUE);
}
}
/************************************************************ /************************************************************
Recovers from a checkpoint. When this function returns, the database is able Recovers from a checkpoint. When this function returns, the database is able
to start processing of new user transactions, but the function to start processing of new user transactions, but the function
...@@ -2681,72 +2760,6 @@ recv_recovery_from_checkpoint_start( ...@@ -2681,72 +2760,6 @@ recv_recovery_from_checkpoint_start(
recv_sys->recovered_lsn = checkpoint_lsn; recv_sys->recovered_lsn = checkpoint_lsn;
srv_start_lsn = checkpoint_lsn; srv_start_lsn = checkpoint_lsn;
/* NOTE: we always do a 'recovery' at startup, but only if
there is something wrong we will print a message to the
user about recovery: */
if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
|| ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn)
< 0) {
fprintf(stderr,
"InnoDB: ##########################################################\n"
"InnoDB: WARNING!\n"
"InnoDB: The log sequence number in ibdata files is higher\n"
"InnoDB: than the log sequence number in the ib_logfiles! Are you sure\n"
"InnoDB: you are using the right ib_logfiles to start up the database?\n"
"InnoDB: Log sequence number in ib_logfiles is %lu %lu, log\n"
"InnoDB: sequence numbers stamped to ibdata file headers are between\n"
"InnoDB: %lu %lu and %lu %lu.\n"
"InnoDB: ##########################################################\n",
(ulong) ut_dulint_get_high(checkpoint_lsn),
(ulong) ut_dulint_get_low(checkpoint_lsn),
(ulong) ut_dulint_get_high(min_flushed_lsn),
(ulong) ut_dulint_get_low(min_flushed_lsn),
(ulong) ut_dulint_get_high(max_flushed_lsn),
(ulong) ut_dulint_get_low(max_flushed_lsn));
}
recv_needed_recovery = TRUE;
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Database was not shut down normally!\n"
"InnoDB: Starting crash recovery.\n");
fprintf(stderr,
"InnoDB: Reading tablespace information from the .ibd files...\n");
fil_load_single_table_tablespaces();
/* If we are using the doublewrite method, we will
check if there are half-written pages in data files,
and restore them from the doublewrite buffer if
possible */
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
fprintf(stderr,
"InnoDB: Restoring possible half-written data pages from the doublewrite\n"
"InnoDB: buffer...\n");
trx_sys_doublewrite_init_or_restore_pages(
TRUE);
}
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Starting log scan based on checkpoint at\n"
"InnoDB: log sequence number %lu %lu.\n",
(ulong) ut_dulint_get_high(checkpoint_lsn),
(ulong) ut_dulint_get_low(checkpoint_lsn));
} else {
/* Init the doublewrite buffer memory structure */
trx_sys_doublewrite_init_or_restore_pages(FALSE);
}
} }
contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn, contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn,
...@@ -2798,7 +2811,9 @@ recv_recovery_from_checkpoint_start( ...@@ -2798,7 +2811,9 @@ recv_recovery_from_checkpoint_start(
group = UT_LIST_GET_NEXT(log_groups, group); group = UT_LIST_GET_NEXT(log_groups, group);
} }
while (group) { /* Set the flag to publish that we are doing startup scan. */
recv_log_scan_is_startup_type = (type == LOG_CHECKPOINT);
while (group) {
old_scanned_lsn = recv_sys->scanned_lsn; old_scanned_lsn = recv_sys->scanned_lsn;
recv_group_scan_log_recs(group, &contiguous_lsn, recv_group_scan_log_recs(group, &contiguous_lsn,
...@@ -2819,6 +2834,69 @@ recv_recovery_from_checkpoint_start( ...@@ -2819,6 +2834,69 @@ recv_recovery_from_checkpoint_start(
group = UT_LIST_GET_NEXT(log_groups, group); group = UT_LIST_GET_NEXT(log_groups, group);
} }
/* Done with startup scan. Clear the flag. */
recv_log_scan_is_startup_type = FALSE;
if (type == LOG_CHECKPOINT) {
/* NOTE: we always do a 'recovery' at startup, but only if
there is something wrong we will print a message to the
user about recovery: */
if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
|| ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn)
< 0) {
fprintf(stderr,
"InnoDB: #########################"
"#################################\n"
"InnoDB: "
"WARNING!\n"
"InnoDB: The log sequence number"
" in ibdata files is higher\n"
"InnoDB: than the log sequence number"
" in the ib_logfiles! Are you sure\n"
"InnoDB: you are using the right"
" ib_logfiles to start up"
" the database?\n"
"InnoDB: Log sequence number in"
" ib_logfiles is %lu %lu, log\n"
"InnoDB: sequence numbers stamped"
" to ibdata file headers are between\n"
"InnoDB: %lu %lu and %lu %lu.\n"
"InnoDB: #########################"
"#################################\n",
(ulong) ut_dulint_get_high(
checkpoint_lsn),
(ulong) ut_dulint_get_low(
checkpoint_lsn),
(ulong) ut_dulint_get_high(
min_flushed_lsn),
(ulong) ut_dulint_get_low(
min_flushed_lsn),
(ulong) ut_dulint_get_high(
max_flushed_lsn),
(ulong) ut_dulint_get_low(
max_flushed_lsn));
}
if (!recv_needed_recovery) {
fprintf(stderr,
"InnoDB: The log sequence number"
" in ibdata files does not match\n"
"InnoDB: the log sequence number"
" in the ib_logfiles!\n");
recv_init_crash_recovery();
}
}
if (!recv_needed_recovery) {
/* Init the doublewrite buffer memory structure */
trx_sys_doublewrite_init_or_restore_pages(FALSE);
}
}
/* We currently have only one log group */ /* We currently have only one log group */
if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) { if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) {
ut_print_timestamp(stderr); ut_print_timestamp(stderr);
...@@ -2871,16 +2949,9 @@ recv_recovery_from_checkpoint_start( ...@@ -2871,16 +2949,9 @@ recv_recovery_from_checkpoint_start(
recv_synchronize_groups(up_to_date_group); recv_synchronize_groups(up_to_date_group);
if (!recv_needed_recovery) { if (!recv_needed_recovery) {
if (ut_dulint_cmp(checkpoint_lsn, recv_sys->recovered_lsn) ut_a(ut_dulint_cmp(checkpoint_lsn,
!= 0) { recv_sys->recovered_lsn) == 0);
fprintf(stderr,
"InnoDB: Warning: we did not need to do crash recovery, but log scan\n"
"InnoDB: progressed past the checkpoint lsn %lu %lu up to lsn %lu %lu\n",
(ulong) ut_dulint_get_high(checkpoint_lsn),
(ulong) ut_dulint_get_low(checkpoint_lsn),
(ulong) ut_dulint_get_high(recv_sys->recovered_lsn),
(ulong) ut_dulint_get_low(recv_sys->recovered_lsn));
}
} else { } else {
srv_start_lsn = recv_sys->recovered_lsn; srv_start_lsn = recv_sys->recovered_lsn;
} }
......
...@@ -436,10 +436,9 @@ os_file_handle_error_no_exit( ...@@ -436,10 +436,9 @@ os_file_handle_error_no_exit(
#undef USE_FILE_LOCK #undef USE_FILE_LOCK
#define USE_FILE_LOCK #define USE_FILE_LOCK
#if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__FreeBSD__) || defined(__NETWARE__) #if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__NETWARE__)
/* InnoDB Hot Backup does not lock the data files. /* InnoDB Hot Backup does not lock the data files.
* On Windows, mandatory locking is used. * On Windows, mandatory locking is used.
* On FreeBSD with LinuxThreads, advisory locking does not work properly.
*/ */
# undef USE_FILE_LOCK # undef USE_FILE_LOCK
#endif #endif
......
...@@ -1151,7 +1151,7 @@ srv_conc_force_enter_innodb( ...@@ -1151,7 +1151,7 @@ srv_conc_force_enter_innodb(
srv_conc_n_threads++; srv_conc_n_threads++;
trx->declared_to_be_inside_innodb = TRUE; trx->declared_to_be_inside_innodb = TRUE;
trx->n_tickets_to_enter_innodb = 0; trx->n_tickets_to_enter_innodb = 1;
os_fast_mutex_unlock(&srv_conc_mutex); os_fast_mutex_unlock(&srv_conc_mutex);
} }
......
...@@ -1606,19 +1606,21 @@ trx_commit_for_mysql( ...@@ -1606,19 +1606,21 @@ trx_commit_for_mysql(
the transaction object does not have an InnoDB session object, and we the transaction object does not have an InnoDB session object, and we
set the dummy session that we use for all MySQL transactions. */ set the dummy session that we use for all MySQL transactions. */
mutex_enter(&kernel_mutex);
if (trx->sess == NULL) { if (trx->sess == NULL) {
/* Open a dummy session */ /* Open a dummy session */
if (!trx_dummy_sess) { if (!trx_dummy_sess) {
trx_dummy_sess = sess_open(); mutex_enter(&kernel_mutex);
if (!trx_dummy_sess) {
trx_dummy_sess = sess_open();
}
mutex_exit(&kernel_mutex);
} }
trx->sess = trx_dummy_sess; trx->sess = trx_dummy_sess;
} }
mutex_exit(&kernel_mutex);
trx_start_if_not_started(trx); trx_start_if_not_started(trx);
......
...@@ -3301,8 +3301,6 @@ ha_innobase::write_row( ...@@ -3301,8 +3301,6 @@ ha_innobase::write_row(
if (error == DB_DUPLICATE_KEY && auto_inc_used if (error == DB_DUPLICATE_KEY && auto_inc_used
&& (user_thd->lex->sql_command == SQLCOM_REPLACE && (user_thd->lex->sql_command == SQLCOM_REPLACE
|| user_thd->lex->sql_command == SQLCOM_REPLACE_SELECT || user_thd->lex->sql_command == SQLCOM_REPLACE_SELECT
|| (user_thd->lex->sql_command == SQLCOM_INSERT
&& user_thd->lex->duplicates == DUP_UPDATE)
|| (user_thd->lex->sql_command == SQLCOM_LOAD || (user_thd->lex->sql_command == SQLCOM_LOAD
&& user_thd->lex->duplicates == DUP_REPLACE))) { && user_thd->lex->duplicates == DUP_REPLACE))) {
...@@ -3533,6 +3531,27 @@ ha_innobase::update_row( ...@@ -3533,6 +3531,27 @@ ha_innobase::update_row(
error = row_update_for_mysql((byte*) old_row, prebuilt); error = row_update_for_mysql((byte*) old_row, prebuilt);
/* We need to do some special AUTOINC handling for the following case:
INSERT INTO t (c1,c2) VALUES(x,y) ON DUPLICATE KEY UPDATE ...
We need to use the AUTOINC counter that was actually used by
MySQL in the UPDATE statement, which can be different from the
value used in the INSERT statement.*/
if (error == DB_SUCCESS
&& table->next_number_field && new_row == table->record[0]
&& user_thd->lex->sql_command == SQLCOM_INSERT
&& user_thd->lex->duplicates == DUP_UPDATE) {
longlong auto_inc;
auto_inc = table->next_number_field->val_int();
if (auto_inc != 0) {
dict_table_autoinc_update(prebuilt->table, auto_inc);
}
}
innodb_srv_conc_exit_innodb(prebuilt->trx); innodb_srv_conc_exit_innodb(prebuilt->trx);
error = convert_error_code_to_mysql(error, user_thd); error = convert_error_code_to_mysql(error, user_thd);
...@@ -5609,9 +5628,9 @@ ha_innobase::update_table_comment( ...@@ -5609,9 +5628,9 @@ ha_innobase::update_table_comment(
mutex_enter_noninline(&srv_dict_tmpfile_mutex); mutex_enter_noninline(&srv_dict_tmpfile_mutex);
rewind(srv_dict_tmpfile); rewind(srv_dict_tmpfile);
fprintf(srv_dict_tmpfile, "InnoDB free: %lu kB", fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB",
(ulong) fsp_get_available_space_in_free_extents( fsp_get_available_space_in_free_extents(
prebuilt->table->space)); prebuilt->table->space));
dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile, dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile,
prebuilt->trx, prebuilt->table); prebuilt->trx, prebuilt->table);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment