Apply snapshot innodb-51-ss1644

Fixes:
- Bug #23710: crash_commit_before fails if innodb_file_per_table=1
- Bug #28254: innodb crash if shutdown during innodb_table_monitor is running
- Bug #28604: innodb_force_recovery restricts data dump
- Bug #29097: fsp_get_available_space_in_free_extents() is capped at 4TB
- Bug #29155: Innodb "Parallel recovery" is not prevented
parent 2bc5179c
...@@ -55,7 +55,9 @@ noinst_HEADERS = include/btr0btr.h include/btr0btr.ic \ ...@@ -55,7 +55,9 @@ noinst_HEADERS = include/btr0btr.h include/btr0btr.ic \
include/ha0ha.ic include/hash0hash.h \ include/ha0ha.ic include/hash0hash.h \
include/hash0hash.ic include/ibuf0ibuf.h \ include/hash0hash.ic include/ibuf0ibuf.h \
include/ibuf0ibuf.ic include/ibuf0types.h \ include/ibuf0ibuf.ic include/ibuf0types.h \
include/lock0iter.h \
include/lock0lock.h include/lock0lock.ic \ include/lock0lock.h include/lock0lock.ic \
include/lock0priv.h include/lock0priv.ic \
include/lock0types.h include/log0log.h \ include/lock0types.h include/log0log.h \
include/log0log.ic include/log0recv.h \ include/log0log.ic include/log0recv.h \
include/log0recv.ic include/mach0data.h \ include/log0recv.ic include/mach0data.h \
...@@ -129,7 +131,8 @@ libinnobase_a_SOURCES = btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c \ ...@@ -129,7 +131,8 @@ libinnobase_a_SOURCES = btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c \
eval/eval0eval.c eval/eval0proc.c \ eval/eval0eval.c eval/eval0proc.c \
fil/fil0fil.c fsp/fsp0fsp.c fut/fut0fut.c \ fil/fil0fil.c fsp/fsp0fsp.c fut/fut0fut.c \
fut/fut0lst.c ha/ha0ha.c ha/hash0hash.c \ fut/fut0lst.c ha/ha0ha.c ha/hash0hash.c \
ibuf/ibuf0ibuf.c lock/lock0lock.c \ ibuf/ibuf0ibuf.c lock/lock0iter.c \
lock/lock0lock.c \
log/log0log.c log/log0recv.c mach/mach0data.c \ log/log0log.c log/log0recv.c mach/mach0data.c \
mem/mem0mem.c mem/mem0pool.c mtr/mtr0log.c \ mem/mem0mem.c mem/mem0pool.c mtr/mtr0log.c \
mtr/mtr0mtr.c os/os0file.c os/os0proc.c \ mtr/mtr0mtr.c os/os0file.c os/os0proc.c \
......
...@@ -903,8 +903,7 @@ buf_block_make_young( ...@@ -903,8 +903,7 @@ buf_block_make_young(
/* Note that we read freed_page_clock's without holding any mutex: /* Note that we read freed_page_clock's without holding any mutex:
this is allowed since the result is used only in heuristics */ this is allowed since the result is used only in heuristics */
if (buf_pool->freed_page_clock >= block->freed_page_clock if (buf_block_peek_if_too_old(block)) {
+ 1 + (buf_pool->curr_size / 4)) {
mutex_enter(&buf_pool->mutex); mutex_enter(&buf_pool->mutex);
/* There has been freeing activity in the LRU list: /* There has been freeing activity in the LRU list:
...@@ -1648,6 +1647,15 @@ buf_page_init( ...@@ -1648,6 +1647,15 @@ buf_page_init(
block->lock_hash_val = lock_rec_hash(space, offset); block->lock_hash_val = lock_rec_hash(space, offset);
#ifdef UNIV_DEBUG_VALGRIND
if (!space) {
/* Silence valid Valgrind warnings about uninitialized
data being written to data files. There are some unused
bytes on some pages that InnoDB does not initialize. */
UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
}
#endif /* UNIV_DEBUG_VALGRIND */
/* Insert into the hash table of file pages */ /* Insert into the hash table of file pages */
if (buf_page_hash_get(space, offset)) { if (buf_page_hash_get(space, offset)) {
......
...@@ -244,7 +244,15 @@ buf_LRU_search_and_free_block( ...@@ -244,7 +244,15 @@ buf_LRU_search_and_free_block(
frame at all */ frame at all */
if (block->frame) { if (block->frame) {
/* The page was declared uninitialized
by buf_LRU_block_remove_hashed_page().
We need to flag the contents of the
page valid (which it still is) in
order to avoid bogus Valgrind
warnings. */
UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
btr_search_drop_page_hash_index(block->frame); btr_search_drop_page_hash_index(block->frame);
UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
} }
ut_a(block->buf_fix_count == 0); ut_a(block->buf_fix_count == 0);
...@@ -449,6 +457,7 @@ loop: ...@@ -449,6 +457,7 @@ loop:
mutex_enter(&block->mutex); mutex_enter(&block->mutex);
block->state = BUF_BLOCK_READY_FOR_USE; block->state = BUF_BLOCK_READY_FOR_USE;
UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
mutex_exit(&block->mutex); mutex_exit(&block->mutex);
...@@ -864,6 +873,7 @@ buf_LRU_block_free_non_file_page( ...@@ -864,6 +873,7 @@ buf_LRU_block_free_non_file_page(
block->state = BUF_BLOCK_NOT_USED; block->state = BUF_BLOCK_NOT_USED;
UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
/* Wipe contents of page to reveal possible stale pointers to it */ /* Wipe contents of page to reveal possible stale pointers to it */
memset(block->frame, '\0', UNIV_PAGE_SIZE); memset(block->frame, '\0', UNIV_PAGE_SIZE);
...@@ -871,6 +881,8 @@ buf_LRU_block_free_non_file_page( ...@@ -871,6 +881,8 @@ buf_LRU_block_free_non_file_page(
UT_LIST_ADD_FIRST(free, buf_pool->free, block); UT_LIST_ADD_FIRST(free, buf_pool->free, block);
block->in_free_list = TRUE; block->in_free_list = TRUE;
UNIV_MEM_FREE(block->frame, UNIV_PAGE_SIZE);
if (srv_use_awe && block->frame) { if (srv_use_awe && block->frame) {
/* Add to the list of mapped pages */ /* Add to the list of mapped pages */
...@@ -939,6 +951,7 @@ buf_LRU_block_remove_hashed_page( ...@@ -939,6 +951,7 @@ buf_LRU_block_remove_hashed_page(
buf_page_address_fold(block->space, block->offset), buf_page_address_fold(block->space, block->offset),
block); block);
UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
block->state = BUF_BLOCK_REMOVE_HASH; block->state = BUF_BLOCK_REMOVE_HASH;
} }
......
...@@ -2829,7 +2829,7 @@ will be able to insert new data to the database without running out the ...@@ -2829,7 +2829,7 @@ will be able to insert new data to the database without running out the
tablespace. Only free extents are taken into account and we also subtract tablespace. Only free extents are taken into account and we also subtract
the safety margin required by the above function fsp_reserve_free_extents. */ the safety margin required by the above function fsp_reserve_free_extents. */
ulint ullint
fsp_get_available_space_in_free_extents( fsp_get_available_space_in_free_extents(
/*====================================*/ /*====================================*/
/* out: available space in kB */ /* out: available space in kB */
...@@ -2895,7 +2895,8 @@ fsp_get_available_space_in_free_extents( ...@@ -2895,7 +2895,8 @@ fsp_get_available_space_in_free_extents(
return(0); return(0);
} }
return(((n_free - reserve) * FSP_EXTENT_SIZE) return((ullint)(n_free - reserve)
* FSP_EXTENT_SIZE
* (UNIV_PAGE_SIZE / 1024)); * (UNIV_PAGE_SIZE / 1024));
} }
......
...@@ -4533,17 +4533,16 @@ ha_innobase::position( ...@@ -4533,17 +4533,16 @@ ha_innobase::position(
/********************************************************************* /*********************************************************************
If it's a DB_TOO_BIG_RECORD error then set a suitable message to If it's a DB_TOO_BIG_RECORD error then set a suitable message to
return to the client.*/ return to the client.*/
static inline
void void
innodb_check_for_record_too_big_error( innodb_check_for_record_too_big_error(
/*==================================*/ /*==================================*/
dict_table_t* table, /* in: table to check */ ulint comp, /* in: ROW_FORMAT: nonzero=COMPACT, 0=REDUNDANT */
int error) /* in: error code to check */ int error) /* in: error code to check */
{ {
if (error == (int)DB_TOO_BIG_RECORD) { if (error == (int)DB_TOO_BIG_RECORD) {
ulint max_row_size; ulint max_row_size
= page_get_free_space_of_empty_noninline(comp) / 2;
max_row_size = page_get_free_space_of_empty_noninline(table);
my_error(ER_TOO_BIG_ROWSIZE, MYF(0), max_row_size); my_error(ER_TOO_BIG_ROWSIZE, MYF(0), max_row_size);
} }
...@@ -4657,9 +4656,7 @@ create_table_def( ...@@ -4657,9 +4656,7 @@ create_table_def(
error = row_create_table_for_mysql(table, trx); error = row_create_table_for_mysql(table, trx);
/* We need access to the table and so we do the error checking innodb_check_for_record_too_big_error(flags & DICT_TF_COMPACT, error);
and set the error message here, before the error translation.*/
innodb_check_for_record_too_big_error(table, error);
error = convert_error_code_to_mysql(error, NULL); error = convert_error_code_to_mysql(error, NULL);
...@@ -4783,9 +4780,8 @@ create_index( ...@@ -4783,9 +4780,8 @@ create_index(
sure we don't create too long indexes. */ sure we don't create too long indexes. */
error = row_create_index_for_mysql(index, trx, field_lengths); error = row_create_index_for_mysql(index, trx, field_lengths);
/* We need access to the table and so we do the error checking innodb_check_for_record_too_big_error(form->s->row_type
and set the error message here, before the error translation.*/ != ROW_TYPE_REDUNDANT, error);
innodb_check_for_record_too_big_error(index->table, error);
error = convert_error_code_to_mysql(error, NULL); error = convert_error_code_to_mysql(error, NULL);
...@@ -4802,6 +4798,8 @@ int ...@@ -4802,6 +4798,8 @@ int
create_clustered_index_when_no_primary( create_clustered_index_when_no_primary(
/*===================================*/ /*===================================*/
trx_t* trx, /* in: InnoDB transaction handle */ trx_t* trx, /* in: InnoDB transaction handle */
ulint comp, /* in: ROW_FORMAT:
nonzero=COMPACT, 0=REDUNDANT */
const char* table_name) /* in: table name */ const char* table_name) /* in: table name */
{ {
dict_index_t* index; dict_index_t* index;
...@@ -4810,13 +4808,11 @@ create_clustered_index_when_no_primary( ...@@ -4810,13 +4808,11 @@ create_clustered_index_when_no_primary(
/* We pass 0 as the space id, and determine at a lower level the space /* We pass 0 as the space id, and determine at a lower level the space
id where to store the table */ id where to store the table */
index = dict_mem_index_create((char*) table_name, index = dict_mem_index_create(table_name, "GEN_CLUST_INDEX",
(char*) "GEN_CLUST_INDEX", 0, DICT_CLUSTERED, 0); 0, DICT_CLUSTERED, 0);
error = row_create_index_for_mysql(index, trx, NULL); error = row_create_index_for_mysql(index, trx, NULL);
/* We need access to the table and so we do the error checking innodb_check_for_record_too_big_error(comp, error);
and set the error message here, before the error translation.*/
innodb_check_for_record_too_big_error(index->table, error);
error = convert_error_code_to_mysql(error, NULL); error = convert_error_code_to_mysql(error, NULL);
...@@ -4947,7 +4943,8 @@ ha_innobase::create( ...@@ -4947,7 +4943,8 @@ ha_innobase::create(
order the rows by their row id which is internally generated order the rows by their row id which is internally generated
by InnoDB */ by InnoDB */
error = create_clustered_index_when_no_primary(trx, error = create_clustered_index_when_no_primary(
trx, form->s->row_type != ROW_TYPE_REDUNDANT,
norm_name); norm_name);
if (error) { if (error) {
goto cleanup; goto cleanup;
...@@ -5873,8 +5870,8 @@ ha_innobase::update_table_comment( ...@@ -5873,8 +5870,8 @@ ha_innobase::update_table_comment(
mutex_enter_noninline(&srv_dict_tmpfile_mutex); mutex_enter_noninline(&srv_dict_tmpfile_mutex);
rewind(srv_dict_tmpfile); rewind(srv_dict_tmpfile);
fprintf(srv_dict_tmpfile, "InnoDB free: %lu kB", fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB",
(ulong) fsp_get_available_space_in_free_extents( fsp_get_available_space_in_free_extents(
prebuilt->table->space)); prebuilt->table->space));
dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile, dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile,
......
...@@ -28,7 +28,7 @@ buf_block_peek_if_too_old( ...@@ -28,7 +28,7 @@ buf_block_peek_if_too_old(
buf_block_t* block) /* in: block to make younger */ buf_block_t* block) /* in: block to make younger */
{ {
return(buf_pool->freed_page_clock >= block->freed_page_clock return(buf_pool->freed_page_clock >= block->freed_page_clock
+ 1 + (buf_pool->curr_size / 1024)); + 1 + (buf_pool->curr_size / 4));
} }
/************************************************************************* /*************************************************************************
......
...@@ -245,7 +245,7 @@ will be able to insert new data to the database without running out the ...@@ -245,7 +245,7 @@ will be able to insert new data to the database without running out the
tablespace. Only free extents are taken into account and we also subtract tablespace. Only free extents are taken into account and we also subtract
the safety margin required by the above function fsp_reserve_free_extents. */ the safety margin required by the above function fsp_reserve_free_extents. */
ulint ullint
fsp_get_available_space_in_free_extents( fsp_get_available_space_in_free_extents(
/*====================================*/ /*====================================*/
/* out: available space in kB */ /* out: available space in kB */
......
...@@ -519,6 +519,18 @@ lock_is_table_exclusive( ...@@ -519,6 +519,18 @@ lock_is_table_exclusive(
dict_table_t* table, /* in: table */ dict_table_t* table, /* in: table */
trx_t* trx); /* in: transaction */ trx_t* trx); /* in: transaction */
/************************************************************************* /*************************************************************************
Checks if a lock request lock1 has to wait for request lock2. */
ibool
lock_has_to_wait(
/*=============*/
/* out: TRUE if lock1 has to wait for lock2 to be
removed */
lock_t* lock1, /* in: waiting lock */
lock_t* lock2); /* in: another lock; NOTE that it is assumed that this
has a lock bit set on the same record as in lock1 if
the locks are record locks */
/*************************************************************************
Checks that a transaction id is sensible, i.e., not in the future. */ Checks that a transaction id is sensible, i.e., not in the future. */
ibool ibool
...@@ -597,7 +609,7 @@ lock_validate(void); ...@@ -597,7 +609,7 @@ lock_validate(void);
/* out: TRUE if ok */ /* out: TRUE if ok */
/************************************************************************* /*************************************************************************
Return approximate number or record locks (bits set in the bitmap) for Return approximate number or record locks (bits set in the bitmap) for
this transaction. Since delete-marked records ma ybe removed, the this transaction. Since delete-marked records maybe removed, the
record count will not be precise. */ record count will not be precise. */
ulint ulint
......
...@@ -167,6 +167,8 @@ mem_heap_alloc( ...@@ -167,6 +167,8 @@ mem_heap_alloc(
mem_block_set_free(block, free + MEM_SPACE_NEEDED(n)); mem_block_set_free(block, free + MEM_SPACE_NEEDED(n));
#ifdef UNIV_MEM_DEBUG #ifdef UNIV_MEM_DEBUG
UNIV_MEM_ALLOC(buf,
n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE);
/* In the debug version write debugging info to the field */ /* In the debug version write debugging info to the field */
mem_field_init((byte*)buf, n); mem_field_init((byte*)buf, n);
...@@ -177,8 +179,10 @@ mem_heap_alloc( ...@@ -177,8 +179,10 @@ mem_heap_alloc(
#endif #endif
#ifdef UNIV_SET_MEM_TO_ZERO #ifdef UNIV_SET_MEM_TO_ZERO
UNIV_MEM_ALLOC(buf, n);
memset(buf, '\0', n); memset(buf, '\0', n);
#endif #endif
UNIV_MEM_ALLOC(buf, n);
return(buf); return(buf);
} }
...@@ -369,6 +373,8 @@ mem_heap_free_top( ...@@ -369,6 +373,8 @@ mem_heap_free_top(
if ((heap != block) && (mem_block_get_free(block) if ((heap != block) && (mem_block_get_free(block)
== mem_block_get_start(block))) { == mem_block_get_start(block))) {
mem_heap_block_free(heap, block); mem_heap_block_free(heap, block);
} else {
UNIV_MEM_FREE((byte*) block + mem_block_get_free(block), n);
} }
} }
......
...@@ -531,6 +531,15 @@ page_get_free_space_of_empty( ...@@ -531,6 +531,15 @@ page_get_free_space_of_empty(
/* out: free space */ /* out: free space */
ulint comp) /* in: nonzero=compact page format */ ulint comp) /* in: nonzero=compact page format */
__attribute__((const)); __attribute__((const));
/*****************************************************************
Calculates free space if a page is emptied. */
ulint
page_get_free_space_of_empty_noninline(
/*===================================*/
/* out: free space */
ulint comp) /* in: nonzero=compact page format */
__attribute__((const));
/**************************************************************** /****************************************************************
Returns the sum of the sizes of the records in the record list Returns the sum of the sizes of the records in the record list
excluding the infimum and supremum records. */ excluding the infimum and supremum records. */
......
...@@ -460,19 +460,6 @@ row_check_table_for_mysql( ...@@ -460,19 +460,6 @@ row_check_table_for_mysql(
/* out: DB_ERROR or DB_SUCCESS */ /* out: DB_ERROR or DB_SUCCESS */
row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
handle */ handle */
/*************************************************************************
Get the min of the maximum possible row sizes. */
ulint
page_get_free_space_of_empty_noninline(
/*===================================*/
/* out: The (approx) maximum size
of a row, this is a conservative
estimate, since the size can be
slightly larger depending upon
the ROW_FORMAT setting.*/
dict_table_t* table); /* in: table for which max record
size required.*/
/* A struct describing a place for an individual column in the MySQL /* A struct describing a place for an individual column in the MySQL
row format which is presented to the table handler in ha_innobase. row format which is presented to the table handler in ha_innobase.
......
...@@ -83,6 +83,8 @@ memory is read outside the allocated blocks. */ ...@@ -83,6 +83,8 @@ memory is read outside the allocated blocks. */
/* Make a non-inline debug version */ /* Make a non-inline debug version */
#if 0 #if 0
#define UNIV_DEBUG_VALGRIND /* Enable extra
Valgrind instrumentation */
#define UNIV_DEBUG /* Enable ut_ad() assertions */ #define UNIV_DEBUG /* Enable ut_ad() assertions */
#define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */ #define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */
#define UNIV_MEM_DEBUG /* detect memory leaks etc */ #define UNIV_MEM_DEBUG /* detect memory leaks etc */
...@@ -214,6 +216,8 @@ typedef __int64 ib_longlong; ...@@ -214,6 +216,8 @@ typedef __int64 ib_longlong;
typedef longlong ib_longlong; typedef longlong ib_longlong;
#endif #endif
typedef unsigned long long int ullint;
#ifndef __WIN__ #ifndef __WIN__
#if SIZEOF_LONG != SIZEOF_VOIDP #if SIZEOF_LONG != SIZEOF_VOIDP
#error "Error: InnoDB's ulint must be of the same size as void*" #error "Error: InnoDB's ulint must be of the same size as void*"
...@@ -298,5 +302,17 @@ typedef void* os_thread_ret_t; ...@@ -298,5 +302,17 @@ typedef void* os_thread_ret_t;
#include "ut0dbg.h" #include "ut0dbg.h"
#include "ut0ut.h" #include "ut0ut.h"
#include "db0err.h" #include "db0err.h"
#ifdef UNIV_DEBUG_VALGRIND
# include <valgrind/memcheck.h>
# define UNIV_MEM_VALID(addr, size) VALGRIND_MAKE_MEM_DEFINED(addr, size)
# define UNIV_MEM_INVALID(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
# define UNIV_MEM_FREE(addr, size) VALGRIND_MAKE_MEM_NOACCESS(addr, size)
# define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
#else
# define UNIV_MEM_VALID(addr, size) do {} while(0)
# define UNIV_MEM_INVALID(addr, size) do {} while(0)
# define UNIV_MEM_FREE(addr, size) do {} while(0)
# define UNIV_MEM_ALLOC(addr, size) do {} while(0)
#endif
#endif #endif
...@@ -6,10 +6,14 @@ The transaction lock system ...@@ -6,10 +6,14 @@ The transaction lock system
Created 5/7/1996 Heikki Tuuri Created 5/7/1996 Heikki Tuuri
*******************************************************/ *******************************************************/
#define LOCK_MODULE_IMPLEMENTATION
#include "lock0lock.h" #include "lock0lock.h"
#include "lock0priv.h"
#ifdef UNIV_NONINL #ifdef UNIV_NONINL
#include "lock0lock.ic" #include "lock0lock.ic"
#include "lock0priv.ic"
#endif #endif
#include "usr0sess.h" #include "usr0sess.h"
...@@ -319,42 +323,6 @@ ibool lock_print_waits = FALSE; ...@@ -319,42 +323,6 @@ ibool lock_print_waits = FALSE;
/* The lock system */ /* The lock system */
lock_sys_t* lock_sys = NULL; lock_sys_t* lock_sys = NULL;
/* A table lock */
typedef struct lock_table_struct lock_table_t;
struct lock_table_struct{
dict_table_t* table; /* database table in dictionary cache */
UT_LIST_NODE_T(lock_t)
locks; /* list of locks on the same table */
};
/* Record lock for a page */
typedef struct lock_rec_struct lock_rec_t;
struct lock_rec_struct{
ulint space; /* space id */
ulint page_no; /* page number */
ulint n_bits; /* number of bits in the lock bitmap */
/* NOTE: the lock bitmap is placed immediately
after the lock struct */
};
/* Lock struct */
struct lock_struct{
trx_t* trx; /* transaction owning the lock */
UT_LIST_NODE_T(lock_t)
trx_locks; /* list of the locks of the
transaction */
ulint type_mode; /* lock type, mode, LOCK_GAP or
LOCK_REC_NOT_GAP,
LOCK_INSERT_INTENTION,
wait flag, ORed */
hash_node_t hash; /* hash chain node for a record lock */
dict_index_t* index; /* index for a record lock */
union {
lock_table_t tab_lock;/* table lock */
lock_rec_t rec_lock;/* record lock */
} un_member;
};
/* We store info on the latest deadlock error to this buffer. InnoDB /* We store info on the latest deadlock error to this buffer. InnoDB
Monitor will then fetch it and print */ Monitor will then fetch it and print */
ibool lock_deadlock_found = FALSE; ibool lock_deadlock_found = FALSE;
...@@ -400,20 +368,6 @@ lock_deadlock_recursive( ...@@ -400,20 +368,6 @@ lock_deadlock_recursive(
LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
return LOCK_VICTIM_IS_START */ return LOCK_VICTIM_IS_START */
/*************************************************************************
Gets the type of a lock. */
UNIV_INLINE
ulint
lock_get_type(
/*==========*/
/* out: LOCK_TABLE or LOCK_REC */
lock_t* lock) /* in: lock */
{
ut_ad(lock);
return(lock->type_mode & LOCK_TYPE_MASK);
}
/************************************************************************* /*************************************************************************
Gets the nth bit of a record lock. */ Gets the nth bit of a record lock. */
UNIV_INLINE UNIV_INLINE
...@@ -612,7 +566,7 @@ ulint ...@@ -612,7 +566,7 @@ ulint
lock_get_mode( lock_get_mode(
/*==========*/ /*==========*/
/* out: mode */ /* out: mode */
lock_t* lock) /* in: lock */ const lock_t* lock) /* in: lock */
{ {
ut_ad(lock); ut_ad(lock);
...@@ -1017,7 +971,7 @@ lock_rec_has_to_wait( ...@@ -1017,7 +971,7 @@ lock_rec_has_to_wait(
/************************************************************************* /*************************************************************************
Checks if a lock request lock1 has to wait for request lock2. */ Checks if a lock request lock1 has to wait for request lock2. */
static
ibool ibool
lock_has_to_wait( lock_has_to_wait(
/*=============*/ /*=============*/
...@@ -1098,7 +1052,7 @@ lock_rec_set_nth_bit( ...@@ -1098,7 +1052,7 @@ lock_rec_set_nth_bit(
/************************************************************************** /**************************************************************************
Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED, Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
if none found. */ if none found. */
static
ulint ulint
lock_rec_find_set_bit( lock_rec_find_set_bit(
/*==================*/ /*==================*/
...@@ -1390,7 +1344,7 @@ lock_rec_copy( ...@@ -1390,7 +1344,7 @@ lock_rec_copy(
/************************************************************************* /*************************************************************************
Gets the previous record lock set on a record. */ Gets the previous record lock set on a record. */
static
lock_t* lock_t*
lock_rec_get_prev( lock_rec_get_prev(
/*==============*/ /*==============*/
......
...@@ -57,6 +57,16 @@ ibool recv_needed_recovery = FALSE; ...@@ -57,6 +57,16 @@ ibool recv_needed_recovery = FALSE;
ibool recv_lsn_checks_on = FALSE; ibool recv_lsn_checks_on = FALSE;
/* There are two conditions under which we scan the logs, the first
is normal startup and the second is when we do a recovery from an
archive.
This flag is set if we are doing a scan from the last checkpoint during
startup. If we find log entries that were written after the last checkpoint
we know that the server was not cleanly shutdown. We must then initialize
the crash recovery environment before attempting to store these entries in
the log hash table. */
ibool recv_log_scan_is_startup_type = FALSE;
/* If the following is TRUE, the buffer pool file pages must be invalidated /* If the following is TRUE, the buffer pool file pages must be invalidated
after recovery and no ibuf operations are allowed; this becomes TRUE if after recovery and no ibuf operations are allowed; this becomes TRUE if
the log record hash table becomes too full, and log records must be merged the log record hash table becomes too full, and log records must be merged
...@@ -99,6 +109,16 @@ the recovery failed and the database may be corrupt. */ ...@@ -99,6 +109,16 @@ the recovery failed and the database may be corrupt. */
dulint recv_max_page_lsn; dulint recv_max_page_lsn;
/* prototypes */
/***********************************************************
Initialize crash recovery environment. Can be called iff
recv_needed_recovery == FALSE. */
static
void
recv_init_crash_recovery(void);
/*===========================*/
/************************************************************ /************************************************************
Creates the recovery system. */ Creates the recovery system. */
...@@ -2284,6 +2304,23 @@ recv_scan_log_recs( ...@@ -2284,6 +2304,23 @@ recv_scan_log_recs(
if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) { if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) {
/* We have found more entries. If this scan is
of startup type, we must initiate crash recovery
environment before parsing these log records. */
if (recv_log_scan_is_startup_type
&& !recv_needed_recovery) {
fprintf(stderr,
"InnoDB: Log scan progressed"
" past the checkpoint lsn %lu %lu\n",
(ulong) ut_dulint_get_high(
recv_sys->scanned_lsn),
(ulong) ut_dulint_get_low(
recv_sys->scanned_lsn));
recv_init_crash_recovery();
}
/* We were able to find more log data: add it to the /* We were able to find more log data: add it to the
parsing buffer if parse_start_lsn is already parsing buffer if parse_start_lsn is already
non-zero */ non-zero */
...@@ -2405,6 +2442,48 @@ recv_group_scan_log_recs( ...@@ -2405,6 +2442,48 @@ recv_group_scan_log_recs(
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
} }
/***********************************************************
Initialize crash recovery environment. Can be called iff
recv_needed_recovery == FALSE. */
static
void
recv_init_crash_recovery(void)
/*==========================*/
{
ut_a(!recv_needed_recovery);
recv_needed_recovery = TRUE;
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Database was not"
" shut down normally!\n"
"InnoDB: Starting crash recovery.\n");
fprintf(stderr,
"InnoDB: Reading tablespace information"
" from the .ibd files...\n");
fil_load_single_table_tablespaces();
/* If we are using the doublewrite method, we will
check if there are half-written pages in data files,
and restore them from the doublewrite buffer if
possible */
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
fprintf(stderr,
"InnoDB: Restoring possible"
" half-written data pages from"
" the doublewrite\n"
"InnoDB: buffer...\n");
trx_sys_doublewrite_init_or_restore_pages(TRUE);
}
}
/************************************************************ /************************************************************
Recovers from a checkpoint. When this function returns, the database is able Recovers from a checkpoint. When this function returns, the database is able
to start processing of new user transactions, but the function to start processing of new user transactions, but the function
...@@ -2532,92 +2611,6 @@ recv_recovery_from_checkpoint_start( ...@@ -2532,92 +2611,6 @@ recv_recovery_from_checkpoint_start(
recv_sys->recovered_lsn = checkpoint_lsn; recv_sys->recovered_lsn = checkpoint_lsn;
srv_start_lsn = checkpoint_lsn; srv_start_lsn = checkpoint_lsn;
/* NOTE: we always do a 'recovery' at startup, but only if
there is something wrong we will print a message to the
user about recovery: */
if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
|| ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn)
< 0) {
fprintf(stderr,
"InnoDB: #########################"
"#################################\n"
"InnoDB: "
"WARNING!\n"
"InnoDB: The log sequence number"
" in ibdata files is higher\n"
"InnoDB: than the log sequence number"
" in the ib_logfiles! Are you sure\n"
"InnoDB: you are using the right"
" ib_logfiles to start up"
" the database?\n"
"InnoDB: Log sequence number in"
" ib_logfiles is %lu %lu, log\n"
"InnoDB: sequence numbers stamped"
" to ibdata file headers are between\n"
"InnoDB: %lu %lu and %lu %lu.\n"
"InnoDB: #########################"
"#################################\n",
(ulong) ut_dulint_get_high(
checkpoint_lsn),
(ulong) ut_dulint_get_low(
checkpoint_lsn),
(ulong) ut_dulint_get_high(
min_flushed_lsn),
(ulong) ut_dulint_get_low(
min_flushed_lsn),
(ulong) ut_dulint_get_high(
max_flushed_lsn),
(ulong) ut_dulint_get_low(
max_flushed_lsn));
}
recv_needed_recovery = TRUE;
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Database was not"
" shut down normally!\n"
"InnoDB: Starting crash recovery.\n");
fprintf(stderr,
"InnoDB: Reading tablespace information"
" from the .ibd files...\n");
fil_load_single_table_tablespaces();
/* If we are using the doublewrite method, we will
check if there are half-written pages in data files,
and restore them from the doublewrite buffer if
possible */
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
fprintf(stderr,
"InnoDB: Restoring possible"
" half-written data pages from"
" the doublewrite\n"
"InnoDB: buffer...\n");
trx_sys_doublewrite_init_or_restore_pages(
TRUE);
}
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Starting log scan"
" based on checkpoint at\n"
"InnoDB: log sequence number %lu %lu.\n",
(ulong) ut_dulint_get_high(checkpoint_lsn),
(ulong) ut_dulint_get_low(checkpoint_lsn));
} else {
/* Init the doublewrite buffer memory structure */
trx_sys_doublewrite_init_or_restore_pages(FALSE);
}
} }
contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn, contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn,
...@@ -2670,6 +2663,8 @@ recv_recovery_from_checkpoint_start( ...@@ -2670,6 +2663,8 @@ recv_recovery_from_checkpoint_start(
group = UT_LIST_GET_NEXT(log_groups, group); group = UT_LIST_GET_NEXT(log_groups, group);
} }
/* Set the flag to publish that we are doing startup scan. */
recv_log_scan_is_startup_type = (type == LOG_CHECKPOINT);
while (group) { while (group) {
old_scanned_lsn = recv_sys->scanned_lsn; old_scanned_lsn = recv_sys->scanned_lsn;
...@@ -2691,6 +2686,69 @@ recv_recovery_from_checkpoint_start( ...@@ -2691,6 +2686,69 @@ recv_recovery_from_checkpoint_start(
group = UT_LIST_GET_NEXT(log_groups, group); group = UT_LIST_GET_NEXT(log_groups, group);
} }
/* Done with startup scan. Clear the flag. */
recv_log_scan_is_startup_type = FALSE;
if (type == LOG_CHECKPOINT) {
/* NOTE: we always do a 'recovery' at startup, but only if
there is something wrong we will print a message to the
user about recovery: */
if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
|| ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn)
< 0) {
fprintf(stderr,
"InnoDB: #########################"
"#################################\n"
"InnoDB: "
"WARNING!\n"
"InnoDB: The log sequence number"
" in ibdata files is higher\n"
"InnoDB: than the log sequence number"
" in the ib_logfiles! Are you sure\n"
"InnoDB: you are using the right"
" ib_logfiles to start up"
" the database?\n"
"InnoDB: Log sequence number in"
" ib_logfiles is %lu %lu, log\n"
"InnoDB: sequence numbers stamped"
" to ibdata file headers are between\n"
"InnoDB: %lu %lu and %lu %lu.\n"
"InnoDB: #########################"
"#################################\n",
(ulong) ut_dulint_get_high(
checkpoint_lsn),
(ulong) ut_dulint_get_low(
checkpoint_lsn),
(ulong) ut_dulint_get_high(
min_flushed_lsn),
(ulong) ut_dulint_get_low(
min_flushed_lsn),
(ulong) ut_dulint_get_high(
max_flushed_lsn),
(ulong) ut_dulint_get_low(
max_flushed_lsn));
}
if (!recv_needed_recovery) {
fprintf(stderr,
"InnoDB: The log sequence number"
" in ibdata files does not match\n"
"InnoDB: the log sequence number"
" in the ib_logfiles!\n");
recv_init_crash_recovery();
}
}
if (!recv_needed_recovery) {
/* Init the doublewrite buffer memory structure */
trx_sys_doublewrite_init_or_restore_pages(FALSE);
}
}
/* We currently have only one log group */ /* We currently have only one log group */
if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) { if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) {
ut_print_timestamp(stderr); ut_print_timestamp(stderr);
...@@ -2747,20 +2805,9 @@ recv_recovery_from_checkpoint_start( ...@@ -2747,20 +2805,9 @@ recv_recovery_from_checkpoint_start(
recv_synchronize_groups(up_to_date_group); recv_synchronize_groups(up_to_date_group);
if (!recv_needed_recovery) { if (!recv_needed_recovery) {
if (ut_dulint_cmp(checkpoint_lsn, recv_sys->recovered_lsn) ut_a(ut_dulint_cmp(checkpoint_lsn,
!= 0) { recv_sys->recovered_lsn) == 0);
fprintf(stderr,
"InnoDB: Warning: we did not need to do"
" crash recovery, but log scan\n"
"InnoDB: progressed past the checkpoint"
" lsn %lu %lu up to lsn %lu %lu\n",
(ulong) ut_dulint_get_high(checkpoint_lsn),
(ulong) ut_dulint_get_low(checkpoint_lsn),
(ulong) ut_dulint_get_high(
recv_sys->recovered_lsn),
(ulong) ut_dulint_get_low(
recv_sys->recovered_lsn));
}
} else { } else {
srv_start_lsn = recv_sys->recovered_lsn; srv_start_lsn = recv_sys->recovered_lsn;
} }
......
...@@ -514,6 +514,7 @@ mem_heap_block_free( ...@@ -514,6 +514,7 @@ mem_heap_block_free(
mem_erase_buf((byte*)block, len); mem_erase_buf((byte*)block, len);
#endif #endif
UNIV_MEM_FREE(block, len);
if (init_block) { if (init_block) {
/* Do not have to free: do nothing */ /* Do not have to free: do nothing */
......
...@@ -229,6 +229,8 @@ mem_pool_create( ...@@ -229,6 +229,8 @@ mem_pool_create(
mem_area_set_size(area, ut_2_exp(i)); mem_area_set_size(area, ut_2_exp(i));
mem_area_set_free(area, TRUE); mem_area_set_free(area, TRUE);
UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area,
ut_2_exp(i) - MEM_AREA_EXTRA_SIZE);
UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area); UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
...@@ -300,6 +302,7 @@ mem_pool_fill_free_list( ...@@ -300,6 +302,7 @@ mem_pool_fill_free_list(
UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area); UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);
area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i)); area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i));
UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE);
mem_area_set_size(area2, ut_2_exp(i)); mem_area_set_size(area2, ut_2_exp(i));
mem_area_set_free(area2, TRUE); mem_area_set_free(area2, TRUE);
...@@ -400,6 +403,8 @@ mem_area_alloc( ...@@ -400,6 +403,8 @@ mem_area_alloc(
mutex_exit(&(pool->mutex)); mutex_exit(&(pool->mutex));
ut_ad(mem_pool_validate(pool)); ut_ad(mem_pool_validate(pool));
UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area,
ut_2_exp(n) - MEM_AREA_EXTRA_SIZE);
return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area))); return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
} }
...@@ -482,6 +487,7 @@ mem_area_free( ...@@ -482,6 +487,7 @@ mem_area_free(
} }
size = mem_area_get_size(area); size = mem_area_get_size(area);
UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE);
if (size == 0) { if (size == 0) {
fprintf(stderr, fprintf(stderr,
......
...@@ -456,10 +456,9 @@ os_file_handle_error_no_exit( ...@@ -456,10 +456,9 @@ os_file_handle_error_no_exit(
#undef USE_FILE_LOCK #undef USE_FILE_LOCK
#define USE_FILE_LOCK #define USE_FILE_LOCK
#if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__FreeBSD__) || defined(__NETWARE__) #if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__NETWARE__)
/* InnoDB Hot Backup does not lock the data files. /* InnoDB Hot Backup does not lock the data files.
* On Windows, mandatory locking is used. * On Windows, mandatory locking is used.
* On FreeBSD with LinuxThreads, advisory locking does not work properly.
*/ */
# undef USE_FILE_LOCK # undef USE_FILE_LOCK
#endif #endif
......
...@@ -209,6 +209,18 @@ page_set_max_trx_id( ...@@ -209,6 +209,18 @@ page_set_max_trx_id(
} }
} }
/*****************************************************************
Calculates free space if a page is emptied. */
ulint
page_get_free_space_of_empty_noninline(
/*===================================*/
/* out: free space */
ulint comp) /* in: nonzero=compact page format */
{
return(page_get_free_space_of_empty(comp));
}
/**************************************************************** /****************************************************************
Allocates a block of memory from an index page. */ Allocates a block of memory from an index page. */
......
...@@ -753,7 +753,11 @@ rec_convert_dtuple_to_rec_old( ...@@ -753,7 +753,11 @@ rec_convert_dtuple_to_rec_old(
/* Calculate the offset of the origin in the physical record */ /* Calculate the offset of the origin in the physical record */
rec = buf + rec_get_converted_extra_size(data_size, n_fields); rec = buf + rec_get_converted_extra_size(data_size, n_fields);
#ifdef UNIV_DEBUG
/* Suppress Valgrind warnings of ut_ad()
in mach_write_to_1(), mach_write_to_2() et al. */
memset(buf, 0xff, rec - buf + data_size);
#endif /* UNIV_DEBUG */
/* Store the number of fields */ /* Store the number of fields */
rec_set_n_fields_old(rec, n_fields); rec_set_n_fields_old(rec, n_fields);
......
...@@ -4059,25 +4059,3 @@ row_check_table_for_mysql( ...@@ -4059,25 +4059,3 @@ row_check_table_for_mysql(
return(ret); return(ret);
} }
/*************************************************************************
Get the maximum row size. */
ulint
page_get_free_space_of_empty_noninline(
/*===================================*/
/* out: The (approx) maximum size
of a row, this is a conservative
estimate, since the size can be
slightly larger depending upon
the ROW_FORMAT setting.*/
dict_table_t* table) /* in: table for which max record
size is required.*/
{
ibool compact;
compact = dict_table_is_comp(table);
return(page_get_free_space_of_empty(compact) / 2);
}
...@@ -15,16 +15,34 @@ Created 9/11/1995 Heikki Tuuri ...@@ -15,16 +15,34 @@ Created 9/11/1995 Heikki Tuuri
#include "mem0mem.h" #include "mem0mem.h"
#include "srv0srv.h" #include "srv0srv.h"
/* number of system calls made during shared latching */
ulint rw_s_system_call_count = 0; ulint rw_s_system_call_count = 0;
/* number of spin waits on rw-latches,
resulted during shared (read) locks */
ulint rw_s_spin_wait_count = 0; ulint rw_s_spin_wait_count = 0;
/* number of OS waits on rw-latches,
resulted during shared (read) locks */
ulint rw_s_os_wait_count = 0; ulint rw_s_os_wait_count = 0;
/* number of unlocks (that unlock shared locks),
set only when UNIV_SYNC_PERF_STAT is defined */
ulint rw_s_exit_count = 0; ulint rw_s_exit_count = 0;
/* number of system calls made during exclusive latching */
ulint rw_x_system_call_count = 0; ulint rw_x_system_call_count = 0;
/* number of spin waits on rw-latches,
resulted during exclusive (write) locks */
ulint rw_x_spin_wait_count = 0; ulint rw_x_spin_wait_count = 0;
/* number of OS waits on rw-latches,
resulted during exclusive (write) locks */
ulint rw_x_os_wait_count = 0; ulint rw_x_os_wait_count = 0;
/* number of unlocks (that unlock exclusive locks),
set only when UNIV_SYNC_PERF_STAT is defined */
ulint rw_x_exit_count = 0; ulint rw_x_exit_count = 0;
/* The global list of rw-locks */ /* The global list of rw-locks */
......
...@@ -115,6 +115,7 @@ ulint mutex_system_call_count = 0; ...@@ -115,6 +115,7 @@ ulint mutex_system_call_count = 0;
/* Number of spin waits on mutexes: for performance monitoring */ /* Number of spin waits on mutexes: for performance monitoring */
/* round=one iteration of a spin loop */
ulint mutex_spin_round_count = 0; ulint mutex_spin_round_count = 0;
ulint mutex_spin_wait_count = 0; ulint mutex_spin_wait_count = 0;
ulint mutex_os_wait_count = 0; ulint mutex_os_wait_count = 0;
......
...@@ -868,7 +868,16 @@ trx_sysf_create( ...@@ -868,7 +868,16 @@ trx_sysf_create(
trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr); trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
} }
/* The remaining area (up to the page trailer) is uninitialized. */ /* The remaining area (up to the page trailer) is uninitialized.
Silence Valgrind warnings about it. */
UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
+ TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
+ TRX_SYS_RSEG_SPACE),
(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
- (TRX_SYS_RSEGS
+ TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
+ TRX_SYS_RSEG_SPACE))
+ page - sys_header);
/* Create the first rollback segment in the SYSTEM tablespace */ /* Create the first rollback segment in the SYSTEM tablespace */
page_no = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no, page_no = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no,
......
...@@ -1570,19 +1570,21 @@ trx_commit_for_mysql( ...@@ -1570,19 +1570,21 @@ trx_commit_for_mysql(
the transaction object does not have an InnoDB session object, and we the transaction object does not have an InnoDB session object, and we
set the dummy session that we use for all MySQL transactions. */ set the dummy session that we use for all MySQL transactions. */
mutex_enter(&kernel_mutex);
if (trx->sess == NULL) { if (trx->sess == NULL) {
/* Open a dummy session */ /* Open a dummy session */
if (!trx_dummy_sess) {
mutex_enter(&kernel_mutex);
if (!trx_dummy_sess) { if (!trx_dummy_sess) {
trx_dummy_sess = sess_open(); trx_dummy_sess = sess_open();
} }
trx->sess = trx_dummy_sess; mutex_exit(&kernel_mutex);
} }
mutex_exit(&kernel_mutex); trx->sess = trx_dummy_sess;
}
trx_start_if_not_started(trx); trx_start_if_not_started(trx);
......
...@@ -162,6 +162,8 @@ retry: ...@@ -162,6 +162,8 @@ retry:
#endif #endif
} }
UNIV_MEM_ALLOC(ret, n + sizeof(ut_mem_block_t));
((ut_mem_block_t*)ret)->size = n + sizeof(ut_mem_block_t); ((ut_mem_block_t*)ret)->size = n + sizeof(ut_mem_block_t);
((ut_mem_block_t*)ret)->magic_n = UT_MEM_MAGIC_N; ((ut_mem_block_t*)ret)->magic_n = UT_MEM_MAGIC_N;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment