Many files:

  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
mysqld.cc:
  Change MySQL default isolation level to REPEATABLE READ; note that InnoDB has always had that default, and BDB and MyISAM always run at SERIALIZABLE level anyway
parent 24521828
......@@ -274,6 +274,7 @@ btr_page_create(
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
page_create(page, mtr);
buf_block_align(page)->check_index_page_at_flush = TRUE;
btr_page_set_index_id(page, tree->id, mtr);
}
......@@ -713,6 +714,7 @@ btr_create(
/* Create a new index page on the the allocated segment page */
page = page_create(frame, mtr);
buf_block_align(page)->check_index_page_at_flush = TRUE;
/* Set the index id of the page */
btr_page_set_index_id(page, index_id, mtr);
......@@ -847,6 +849,7 @@ btr_page_reorganize_low(
segment headers, next page-field, etc.) is preserved intact */
page_create(page, mtr);
buf_block_align(page)->check_index_page_at_flush = TRUE;
/* Copy the records from the temporary space to the recreated page;
do not copy the lock bits yet */
......@@ -919,6 +922,7 @@ btr_page_empty(
segment headers, next page-field, etc.) is preserved intact */
page_create(page, mtr);
buf_block_align(page)->check_index_page_at_flush = TRUE;
}
/*****************************************************************
......
......@@ -121,16 +121,19 @@ btr_cur_latch_leaves(
{
ulint left_page_no;
ulint right_page_no;
page_t* get_page;
ut_ad(tree && page && mtr);
if (latch_mode == BTR_SEARCH_LEAF) {
btr_page_get(space, page_no, RW_S_LATCH, mtr);
get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
} else if (latch_mode == BTR_MODIFY_LEAF) {
btr_page_get(space, page_no, RW_X_LATCH, mtr);
get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
} else if (latch_mode == BTR_MODIFY_TREE) {
......@@ -138,15 +141,22 @@ btr_cur_latch_leaves(
left_page_no = btr_page_get_prev(page, mtr);
if (left_page_no != FIL_NULL) {
btr_page_get(space, left_page_no, RW_X_LATCH, mtr);
get_page = btr_page_get(space, left_page_no,
RW_X_LATCH, mtr);
buf_block_align(get_page)->check_index_page_at_flush =
TRUE;
}
btr_page_get(space, page_no, RW_X_LATCH, mtr);
get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
right_page_no = btr_page_get_next(page, mtr);
if (right_page_no != FIL_NULL) {
btr_page_get(space, right_page_no, RW_X_LATCH, mtr);
get_page = btr_page_get(space, right_page_no,
RW_X_LATCH, mtr);
buf_block_align(get_page)->check_index_page_at_flush =
TRUE;
}
} else if (latch_mode == BTR_SEARCH_PREV) {
......@@ -157,9 +167,12 @@ btr_cur_latch_leaves(
if (left_page_no != FIL_NULL) {
cursor->left_page = btr_page_get(space, left_page_no,
RW_S_LATCH, mtr);
buf_block_align(
cursor->left_page)->check_index_page_at_flush = TRUE;
}
btr_page_get(space, page_no, RW_S_LATCH, mtr);
get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
} else if (latch_mode == BTR_MODIFY_PREV) {
......@@ -169,9 +182,12 @@ btr_cur_latch_leaves(
if (left_page_no != FIL_NULL) {
cursor->left_page = btr_page_get(space, left_page_no,
RW_X_LATCH, mtr);
buf_block_align(
cursor->left_page)->check_index_page_at_flush = TRUE;
}
btr_page_get(space, page_no, RW_X_LATCH, mtr);
get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
} else {
ut_error;
}
......@@ -274,6 +290,7 @@ btr_cur_search_to_nth_level(
if (btr_search_latch.writer == RW_LOCK_NOT_LOCKED
&& latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
&& !estimate
&& mode != PAGE_CUR_LE_OR_EXTENDS
&& btr_search_guess_on_hash(index, info, tuple, mode,
latch_mode, cursor,
has_search_latch, mtr)) {
......@@ -334,12 +351,18 @@ btr_cur_search_to_nth_level(
rw_latch = RW_NO_LATCH;
buf_mode = BUF_GET;
/* We use these modified search modes on non-leaf levels of the
B-tree. These let us end up in the right B-tree leaf. In that leaf
we use the original search mode. */
if (mode == PAGE_CUR_GE) {
page_mode = PAGE_CUR_L;
} else if (mode == PAGE_CUR_G) {
page_mode = PAGE_CUR_LE;
} else if (mode == PAGE_CUR_LE) {
page_mode = PAGE_CUR_LE;
} else if (mode == PAGE_CUR_LE_OR_EXTENDS) {
page_mode = PAGE_CUR_LE_OR_EXTENDS;
} else {
ut_ad(mode == PAGE_CUR_L);
page_mode = PAGE_CUR_L;
......@@ -390,6 +413,8 @@ btr_cur_search_to_nth_level(
goto retry_page_get;
}
buf_block_align(page)->check_index_page_at_flush = TRUE;
#ifdef UNIV_SYNC_DEBUG
if (rw_latch != RW_NO_LATCH) {
......@@ -543,6 +568,8 @@ btr_cur_open_at_index_side(
ut_ad(0 == ut_dulint_cmp(tree->id,
btr_page_get_index_id(page)));
buf_block_align(page)->check_index_page_at_flush = TRUE;
if (height == ULINT_UNDEFINED) {
/* We are in the root node */
......
......@@ -354,6 +354,7 @@ btr_pcur_move_to_next_page(
ut_ad(next_page_no != FIL_NULL);
next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr);
buf_block_align(next_page)->check_index_page_at_flush = TRUE;
btr_leaf_page_release(page, cursor->latch_mode, mtr);
......
......@@ -331,6 +331,11 @@ buf_page_print(
index->table_name,
index->name);
}
} else if (fil_page_get_type(read_buf) == FIL_PAGE_INODE) {
fprintf(stderr, "InnoDB: Page may be an 'inode' page\n");
} else if (fil_page_get_type(read_buf) == FIL_PAGE_IBUF_FREE_LIST) {
fprintf(stderr,
"InnoDB: Page may be an insert buffer free list page\n");
}
}
......@@ -351,6 +356,8 @@ buf_block_init(
block->file_page_was_freed = FALSE;
block->check_index_page_at_flush = FALSE;
rw_lock_create(&(block->lock));
ut_ad(rw_lock_validate(&(block->lock)));
......@@ -616,6 +623,29 @@ buf_page_peek_block(
return(block);
}
/************************************************************************
Resets the check_index_page_at_flush field of a page if found in the buffer
pool. */
void
buf_reset_check_index_page_at_flush(
/*================================*/
ulint space, /* in: space id */
ulint offset) /* in: page number */
{
buf_block_t* block;
mutex_enter_fast(&(buf_pool->mutex));
block = buf_page_hash_get(space, offset);
if (block) {
block->check_index_page_at_flush = FALSE;
}
mutex_exit(&(buf_pool->mutex));
}
/************************************************************************
Returns the current state of is_hashed of a page. FALSE if the page is
not in the pool. NOTE that this operation does not fix the page in the
......@@ -1185,6 +1215,8 @@ buf_page_init(
block->space = space;
block->offset = offset;
block->check_index_page_at_flush = FALSE;
block->lock_hash_val = lock_rec_hash(space, offset);
block->lock_mutex = NULL;
......
......@@ -15,6 +15,7 @@ Created 11/11/1995 Heikki Tuuri
#include "ut0byte.h"
#include "ut0lst.h"
#include "page0page.h"
#include "fil0fil.h"
#include "buf0buf.h"
#include "buf0lru.h"
......@@ -225,6 +226,24 @@ buf_flush_buffered_writes(void)
return;
}
for (i = 0; i < trx_doublewrite->first_free; i++) {
block = trx_doublewrite->buf_block_arr[i];
if (block->check_index_page_at_flush
&& !page_simple_validate(block->frame)) {
buf_page_print(block->frame);
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Apparent corruption of an index page\n"
"InnoDB: to be written to data file. We intentionally crash server\n"
"InnoDB: to prevent corrupt data from ending up in data\n"
"InnoDB: files.\n");
ut_a(0);
}
}
if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
len = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
} else {
......
......@@ -29,7 +29,14 @@ Created 1/8/1996 Heikki Tuuri
dict_sys_t* dict_sys = NULL; /* the dictionary system */
rw_lock_t dict_foreign_key_check_lock;
rw_lock_t dict_operation_lock; /* table create, drop, etc. reserve
this in X-mode, implicit or backround
operations purge, rollback, foreign
key checks reserve this in S-mode; we
cannot trust that MySQL protects
implicit or background operations
from dropping a table: this is our
mechanism */
#define DICT_HEAP_SIZE 100 /* initial memory heap size when
creating a table or index object */
......@@ -509,9 +516,8 @@ dict_init(void)
UT_LIST_INIT(dict_sys->table_LRU);
rw_lock_create(&dict_foreign_key_check_lock);
rw_lock_set_level(&dict_foreign_key_check_lock,
SYNC_FOREIGN_KEY_CHECK);
rw_lock_create(&dict_operation_lock);
rw_lock_set_level(&dict_operation_lock, SYNC_DICT_OPERATION);
}
/**************************************************************************
......@@ -1851,14 +1857,14 @@ dict_scan_to(
/*************************************************************************
Accepts a specified string. Comparisons are case-insensitive. */
static
char*
dict_accept(
/*========*/
/* out: if string was accepted, the pointer
is moved after that, else ptr is returned */
char* ptr, /* in: scan from this */
const char* string, /* in: accept only this string as the next
const char* string,/* in: accept only this string as the next
non-whitespace string */
ibool* success)/* out: TRUE if accepted */
{
......
......@@ -967,6 +967,7 @@ fil_extend_last_data_file(
fil_node_t* node;
fil_space_t* space;
fil_system_t* system = fil_system;
byte* buf2;
byte* buf;
ibool success;
ulint i;
......@@ -981,19 +982,23 @@ fil_extend_last_data_file(
fil_node_prepare_for_io(node, system, space);
buf = mem_alloc(1024 * 1024);
buf2 = mem_alloc(1024 * 1024 + UNIV_PAGE_SIZE);
buf = ut_align(buf2, UNIV_PAGE_SIZE);
memset(buf, '\0', 1024 * 1024);
for (i = 0; i < size_increase / ((1024 * 1024) / UNIV_PAGE_SIZE); i++) {
success = os_file_write(node->name, node->handle, buf,
/* If we use native Windows aio, then also this write is
done using it */
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
node->name, node->handle, buf,
(node->size << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFF,
node->size >> (32 - UNIV_PAGE_SIZE_SHIFT),
1024 * 1024);
1024 * 1024, NULL, NULL);
if (!success) {
break;
}
......@@ -1003,7 +1008,7 @@ fil_extend_last_data_file(
os_has_said_disk_full = FALSE;
}
mem_free(buf);
mem_free(buf2);
fil_node_complete_io(node, system, OS_FILE_WRITE);
......@@ -1528,7 +1533,6 @@ fil_page_set_type(
ulint type) /* in: type */
{
ut_ad(page);
ut_ad((type == FIL_PAGE_INDEX) || (type == FIL_PAGE_UNDO_LOG));
mach_write_to_2(page + FIL_PAGE_TYPE, type);
}
......
......@@ -769,6 +769,8 @@ fsp_init_file_page_low(
#endif
page = buf_frame_align(ptr);
buf_block_align(page)->check_index_page_at_flush = FALSE;
#ifdef UNIV_BASIC_LOG_DEBUG
/* printf("In log debug version: Erase the contents of the file page\n");
*/
......@@ -1097,7 +1099,7 @@ fsp_fill_free_list(
/* Initialize the ibuf page in a separate
mini-transaction because it is low in the latching
order, and we must be able to release the its latch
order, and we must be able to release its latch
before returning from the fsp routine */
mtr_start(&ibuf_mtr);
......@@ -1264,7 +1266,12 @@ fsp_alloc_free_page(
free = xdes_find_bit(descr, XDES_FREE_BIT, TRUE,
hint % FSP_EXTENT_SIZE, mtr);
ut_a(free != ULINT_UNDEFINED);
if (free == ULINT_UNDEFINED) {
ut_print_buf(((byte*)descr) - 500, 1000);
ut_a(0);
}
xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr);
......@@ -1412,7 +1419,12 @@ fsp_free_extent(
descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
ut_a(xdes_get_state(descr, mtr) != XDES_FREE);
if (xdes_get_state(descr, mtr) == XDES_FREE) {
ut_print_buf(((byte*)descr) - 500, 1000);
ut_a(0);
}
xdes_init(descr, mtr);
......@@ -1523,6 +1535,10 @@ fsp_alloc_seg_inode_page(
page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
buf_block_align(page)->check_index_page_at_flush = FALSE;
fil_page_set_type(page, FIL_PAGE_INODE);
buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) {
......@@ -2298,6 +2314,8 @@ fseg_alloc_free_page_low(
fseg_mark_page_used(seg_inode, space, ret_page, mtr);
}
buf_reset_check_index_page_at_flush(space, ret_page);
return(ret_page);
}
......
......@@ -1295,6 +1295,8 @@ ibuf_add_free_page(
flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
fil_page_set_type(page, FIL_PAGE_IBUF_FREE_LIST);
ibuf_data->seg_size++;
ibuf_data->free_list_len++;
......@@ -1305,6 +1307,7 @@ ibuf_add_free_page(
ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
TRUE, &mtr);
mtr_commit(&mtr);
mutex_exit(&ibuf_mutex);
......
......@@ -274,6 +274,15 @@ buf_page_peek_block(
ulint space, /* in: space id */
ulint offset);/* in: page number */
/************************************************************************
Resets the check_index_page_at_flush field of a page if found in the buffer
pool. */
void
buf_reset_check_index_page_at_flush(
/*================================*/
ulint space, /* in: space id */
ulint offset);/* in: page number */
/************************************************************************
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless
......@@ -648,6 +657,14 @@ struct buf_block_struct{
then it can wait for this rw-lock */
buf_block_t* hash; /* node used in chaining to the page
hash table */
ibool check_index_page_at_flush;
/* TRUE if we know that this is
an index page, and want the database
to check its consistency before flush;
note that there may be pages in the
buffer pool which are index pages,
but this flag is not set because
we do not keep track of all pages */
/* 2. Page flushing fields */
UT_LIST_NODE_T(buf_block_t) flush_list;
......
......@@ -26,6 +26,18 @@ Created 1/8/1996 Heikki Tuuri
#include "ut0byte.h"
#include "trx0types.h"
/*************************************************************************
Accepts a specified string. Comparisons are case-insensitive. */
char*
dict_accept(
/*========*/
/* out: if string was accepted, the pointer
is moved after that, else ptr is returned */
char* ptr, /* in: scan from this */
const char* string,/* in: accept only this string as the next
non-whitespace string */
ibool* success);/* out: TRUE if accepted */
/************************************************************************
Decrements the count of open MySQL handles to a table. */
......@@ -798,7 +810,7 @@ dict_mutex_exit_for_mysql(void);
extern dict_sys_t* dict_sys; /* the dictionary system */
extern rw_lock_t dict_foreign_key_check_lock;
extern rw_lock_t dict_operation_lock;
/* Dictionary system struct */
struct dict_sys_struct{
......
......@@ -73,6 +73,8 @@ extern fil_addr_t fil_addr_null;
/* File page types */
#define FIL_PAGE_INDEX 17855
#define FIL_PAGE_UNDO_LOG 2
#define FIL_PAGE_INODE 3
#define FIL_PAGE_IBUF_FREE_LIST 4
/* Space types */
#define FIL_TABLESPACE 501
......
......@@ -292,16 +292,12 @@ lock_sec_rec_modify_check_and_lock(
dict_index_t* index, /* in: secondary index */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
Checks if locks of other transactions prevent an immediate read, or passing
over by a read cursor, of a clustered index record. If they do, first tests
if the query thread should anyway be suspended for some reason; if not, then
puts the transaction and the query thread to the lock wait state and inserts a
waiting request for a record lock to the lock queue. Sets the requested mode
lock on the record. */
Like the counterpart for a clustered index below, but now we read a
secondary index record. */
ulint
lock_clust_rec_read_check_and_lock(
/*===============================*/
lock_sec_rec_read_check_and_lock(
/*=============================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT,
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
......@@ -309,18 +305,24 @@ lock_clust_rec_read_check_and_lock(
rec_t* rec, /* in: user record or page supremum record
which should be read or passed over by a read
cursor */
dict_index_t* index, /* in: clustered index */
dict_index_t* index, /* in: secondary index */
ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */
ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
LOCK_REC_NOT_GAP */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
Like the counterpart for a clustered index above, but now we read a
secondary index record. */
Checks if locks of other transactions prevent an immediate read, or passing
over by a read cursor, of a clustered index record. If they do, first tests
if the query thread should anyway be suspended for some reason; if not, then
puts the transaction and the query thread to the lock wait state and inserts a
waiting request for a record lock to the lock queue. Sets the requested mode
lock on the record. */
ulint
lock_sec_rec_read_check_and_lock(
/*=============================*/
lock_clust_rec_read_check_and_lock(
/*===============================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT,
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
......@@ -328,10 +330,12 @@ lock_sec_rec_read_check_and_lock(
rec_t* rec, /* in: user record or page supremum record
which should be read or passed over by a read
cursor */
dict_index_t* index, /* in: secondary index */
dict_index_t* index, /* in: clustered index */
ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */
ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
LOCK_REC_NOT_GAP */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
Checks that a record is seen in a consistent read. */
......@@ -509,6 +513,7 @@ lock_validate(void);
extern lock_sys_t* lock_sys;
/* Lock modes and types */
/* Basic modes */
#define LOCK_NONE 0 /* this flag is used elsewhere to note
consistent read */
#define LOCK_IS 2 /* intention shared */
......@@ -519,15 +524,20 @@ extern lock_sys_t* lock_sys;
in an exclusive mode */
#define LOCK_MODE_MASK 0xF /* mask used to extract mode from the
type_mode field in a lock */
/* Lock types */
#define LOCK_TABLE 16 /* these type values should be so high that */
#define LOCK_REC 32 /* they can be ORed to the lock mode */
#define LOCK_TYPE_MASK 0xF0 /* mask used to extract lock type from the
type_mode field in a lock */
/* Waiting lock flag */
#define LOCK_WAIT 256 /* this wait bit should be so high that
it can be ORed to the lock mode and type;
when this bit is set, it means that the
lock has not yet been granted, it is just
waiting for its turn in the wait queue */
/* Precise modes */
#define LOCK_ORDINARY 0 /* this flag denotes an ordinary next-key lock
in contrast to LOCK_GAP or LOCK_REC_NOT_GAP */
#define LOCK_GAP 512 /* this gap bit should be so high that
it can be ORed to the other flags;
when this bit is set, it means that the
......@@ -537,7 +547,15 @@ extern lock_sys_t* lock_sys;
the bit is set; locks of this type are created
when records are removed from the index chain
of records */
#define LOCK_INSERT_INTENTION 1024 /* this bit is set when we place a waiting
#define LOCK_REC_NOT_GAP 1024 /* this bit means that the lock is only on
the index record and does NOT block inserts
to the gap before the index record; this is
used in the case when we retrieve a record
with a unique key, and is also used in
locking plain SELECTs (not part of UPDATE
or DELETE) when the user has set the READ
COMMITTED isolation level */
#define LOCK_INSERT_INTENTION 2048 /* this bit is set when we place a waiting
gap type record lock request in order to let
an insert of an index record to wait until
there are no conflicting locks by other
......
......@@ -111,6 +111,7 @@ log. */
#define OS_WIN31 1
#define OS_WIN95 2
#define OS_WINNT 3
#define OS_WIN2000 4
extern ulint os_n_file_reads;
extern ulint os_n_file_writes;
......@@ -122,7 +123,7 @@ Gets the operating system version. Currently works only on Windows. */
ulint
os_get_os_version(void);
/*===================*/
/* out: OS_WIN95, OS_WIN31, OS_WINNT (2000 == NT) */
/* out: OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */
/********************************************************************
Creates the seek mutexes used in positioned reads and writes. */
......
......@@ -15,6 +15,15 @@ Created 9/30/1995 Heikki Tuuri
typedef void* os_process_t;
typedef unsigned long int os_process_id_t;
/********************************************************************
Converts the current process id to a number. It is not guaranteed that the
number is unique. In Linux returns the 'process number' of the current
thread. That number is the same as one sees in 'top', for example. In Linux
the thread id is not the same as one sees in 'top'. */
ulint
os_proc_get_number(void);
/*====================*/
/********************************************************************
Allocates non-cacheable memory. */
......
......@@ -16,11 +16,8 @@ Created 9/8/1995 Heikki Tuuri
this is also the size of the wait slot array for MySQL threads which
can wait inside InnoDB */
#ifdef __WIN__
/* Windows 95/98/ME seemed to have difficulties creating the all
the event semaphores for the wait array slots. If the computer had
<= 64 MB memory, InnoDB startup could take minutes or even crash.
That is why we set this to only 1000 in Windows. */
/* Create less event semaphores because Win 98/ME had difficult creating
40000 event semaphores */
#define OS_THREAD_MAX_N 1000
#else
#define OS_THREAD_MAX_N 10000
......
......@@ -26,7 +26,12 @@ Created 10/4/1994 Heikki Tuuri
#define PAGE_CUR_GE 2
#define PAGE_CUR_L 3
#define PAGE_CUR_LE 4
#define PAGE_CUR_DBG 5
#define PAGE_CUR_LE_OR_EXTENDS 5 /* This is a search mode used in
"column LIKE 'abc%' ORDER BY column DESC";
we have to find strings which are <= 'abc' or
which extend it */
#define PAGE_CUR_DBG 6
extern ulint page_cur_short_succ;
......
......@@ -666,6 +666,16 @@ page_rec_validate(
/* out: TRUE if ok */
rec_t* rec); /* in: record on the page */
/*******************************************************************
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
even if the page is total garbage. */
ibool
page_simple_validate(
/*=================*/
/* out: TRUE if ok */
page_t* page); /* in: index page */
/*******************************************************************
This function checks the consistency of an index page. */
ibool
......
......@@ -45,6 +45,14 @@ read_view_close(
/*============*/
read_view_t* view); /* in: read view */
/*************************************************************************
Closes a consistent read view for MySQL. This function is called at an SQL
statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
void
read_view_close_for_mysql(
/*======================*/
trx_t* trx); /* in: trx which has a read view */
/*************************************************************************
Checks if a read view sees the specified transaction. */
UNIV_INLINE
ibool
......
......@@ -148,12 +148,22 @@ data field in the record. */
byte*
rec_get_nth_field(
/*==============*/
/* out: pointer to the field, NULL if SQL null */
/* out: pointer to the field */
rec_t* rec, /* in: record */
ulint n, /* in: index of the field */
ulint* len); /* out: length of the field; UNIV_SQL_NULL
if SQL null */
/****************************************************************
Return field length or UNIV_SQL_NULL. */
UNIV_INLINE
ulint
rec_get_nth_field_len(
/*==================*/
/* out: length of the field; UNIV_SQL_NULL if SQL
null */
rec_t* rec, /* in: record */
ulint n); /* in: index of the field */
/****************************************************************
Gets the physical size of a field. Also an SQL null may have a field of
size > 0, if the data type is of a fixed size. */
UNIV_INLINE
......
......@@ -65,6 +65,24 @@ a field stored to another page: */
#define REC_2BYTE_EXTERN_MASK 0x4000
/****************************************************************
Return field length or UNIV_SQL_NULL. */
UNIV_INLINE
ulint
rec_get_nth_field_len(
/*==================*/
/* out: length of the field; UNIV_SQL_NULL if SQL
null */
rec_t* rec, /* in: record */
ulint n) /* in: index of the field */
{
ulint len;
rec_get_nth_field(rec, n, &len);
return(len);
}
/***************************************************************
Sets the value of the ith field SQL null bit. */
......
......@@ -57,8 +57,6 @@ extern ulint srv_flush_log_at_trx_commit;
extern byte srv_latin1_ordering[256];/* The sort order table of the latin1
character set */
extern ibool srv_use_native_aio;
extern ulint srv_pool_size;
extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size;
......@@ -70,8 +68,9 @@ extern dulint srv_archive_recovery_limit_lsn;
extern ulint srv_lock_wait_timeout;
extern char* srv_unix_file_flush_method_str;
extern char* srv_file_flush_method_str;
extern ulint srv_unix_file_flush_method;
extern ulint srv_win_file_flush_method;
extern ulint srv_force_recovery;
extern ulint srv_thread_concurrency;
......@@ -154,13 +153,19 @@ typedef struct srv_sys_struct srv_sys_t;
/* The server system */
extern srv_sys_t* srv_sys;
/* Alternatives for the field flush option in Unix; see the InnoDB manual about
/* Alternatives for the file flush option in Unix; see the InnoDB manual about
what these mean */
#define SRV_UNIX_FDATASYNC 1
#define SRV_UNIX_FDATASYNC 1 /* This is the default; it is currently mapped
to a call of fsync() because fdatasync()
seemed to corrupt files in Linux and Solaris */
#define SRV_UNIX_O_DSYNC 2
#define SRV_UNIX_LITTLESYNC 3
#define SRV_UNIX_NOSYNC 4
/* Alternatives for file i/o in Windows */
#define SRV_WIN_IO_NORMAL 1
#define SRV_WIN_IO_UNBUFFERED 2 /* This is the default */
/* Alternatives for srv_force_recovery. Non-zero values are intended
to help the user get a damaged database up so that he can dump intact
tables and rows with SELECT INTO OUTFILE. The database must not otherwise
......@@ -311,15 +316,17 @@ srv_conc_exit_innodb(
trx_t* trx); /* in: transaction object associated with the
thread */
/*******************************************************************
Puts a MySQL OS thread to wait for a lock to be released. */
Puts a MySQL OS thread to wait for a lock to be released. If an error
occurs during the wait trx->error_state associated with thr is
!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
are possible errors. DB_DEADLOCK is returned if selective deadlock
resolution chose this transaction as a victim. */
ibool
void
srv_suspend_mysql_thread(
/*=====================*/
/* out: TRUE if the lock wait timeout was
exceeded */
que_thr_t* thr); /* in: query thread associated with
the MySQL OS thread */
que_thr_t* thr); /* in: query thread associated with the MySQL
OS thread */
/************************************************************************
Releases a MySQL OS thread waiting for a lock to be released, if the
thread is already suspended. */
......@@ -407,3 +414,4 @@ struct srv_sys_struct{
extern ulint srv_n_threads_active[];
#endif
......@@ -335,7 +335,8 @@ ibool
rw_lock_own(
/*========*/
rw_lock_t* lock, /* in: rw-lock */
ulint lock_type); /* in: lock type */
ulint lock_type); /* in: lock type: RW_LOCK_SHARED,
RW_LOCK_EX */
/**********************************************************************
Checks if somebody has locked the rw-lock in the specified mode. */
......
......@@ -371,10 +371,12 @@ or row lock! */
#define SYNC_NO_ORDER_CHECK 3000 /* this can be used to suppress
latching order checking */
#define SYNC_LEVEL_NONE 2000 /* default: level not defined */
#define SYNC_FOREIGN_KEY_CHECK 1001
#define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve
this in X-mode, implicit or backround
operations purge, rollback, foreign
key checks reserve this in S-mode */
#define SYNC_DICT 1000
#define SYNC_DICT_AUTOINC_MUTEX 999
#define SYNC_PURGE_IS_RUNNING 997
#define SYNC_DICT_HEADER 995
#define SYNC_IBUF_HEADER 914
#define SYNC_IBUF_PESS_INSERT_MUTEX 912
......
......@@ -111,9 +111,6 @@ struct trx_purge_struct{
of the trx system and it never ends */
que_t* query; /* The query graph which will do the
parallelized purge operation */
rw_lock_t purge_is_running;/* Purge operation set an x-latch here
while it is accessing a table: this
prevents dropping of the table */
rw_lock_t latch; /* The latch protecting the purge view.
A purge operation must acquire an
x-latch here for the instant at which
......
......@@ -327,6 +327,7 @@ struct trx_struct{
time_t start_time; /* time the trx object was created
or the state last time became
TRX_ACTIVE */
ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */
ibool check_foreigns; /* normally TRUE, but if the user
wants to suppress foreign key checks,
(in table imports, for example) we
......@@ -350,6 +351,9 @@ struct trx_struct{
/*------------------------------*/
void* mysql_thd; /* MySQL thread handle corresponding
to this trx, or NULL */
char** mysql_query_str;/* pointer to the field in mysqld_thd
which contains the pointer to the
current SQL query string */
char* mysql_log_file_name;
/* if MySQL binlog is used, this field
contains a pointer to the latest file
......@@ -371,6 +375,9 @@ struct trx_struct{
replication has processed */
os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated
with this transaction object */
ulint mysql_process_no;/* since in Linux, 'top' reports
process id's and not thread id's, we
store the process number too */
/*------------------------------*/
ulint n_mysql_tables_in_use; /* number of Innobase tables
used in the processing of the current
......@@ -379,9 +386,9 @@ struct trx_struct{
/* how many tables the current SQL
statement uses, except those
in consistent read */
ibool has_dict_foreign_key_check_lock;
ibool has_dict_operation_lock;
/* TRUE if the trx currently holds
an s-lock on dict_foreign_... */
an s-lock on dict_operation_lock */
ibool has_search_latch;
/* TRUE if this trx has latched the
search system latch in S-mode */
......@@ -523,6 +530,41 @@ struct trx_struct{
#define TRX_QUE_ROLLING_BACK 3 /* transaction is rolling back */
#define TRX_QUE_COMMITTING 4 /* transaction is committing */
/* Transaction isolation levels */
#define TRX_ISO_READ_UNCOMMITTED 1 /* dirty read: non-locking
SELECTs are performed so that
we do not look at a possible
earlier version of a record;
thus they are not 'consistent'
reads under this isolation
level; otherwise like level
2 */
#define TRX_ISO_READ_COMMITTED 2 /* somewhat Oracle-like
isolation, except that in
range UPDATE and DELETE we
must block phantom rows
with next-key locks;
SELECT ... FOR UPDATE and ...
LOCK IN SHARE MODE only lock
the index records, NOT the
gaps before them, and thus
allow free inserting;
each consistent read reads its
own snapshot */
#define TRX_ISO_REPEATABLE_READ 3 /* this is the default;
all consistent reads in the
same trx read the same
snapshot;
full next-key locking used
in locking reads to block
insertions into gaps */
#define TRX_ISO_SERIALIZABLE 4 /* all plain SELECTs are
converted to LOCK IN SHARE
MODE reads */
/* Types of a trx signal */
#define TRX_SIG_NO_SIGNAL 100
#define TRX_SIG_TOTAL_ROLLBACK 1
......
This diff is collapsed.
......@@ -347,9 +347,19 @@ mem_hash_remove(
NULL, NULL);
if (error) {
printf("Inconsistency in memory heap or buffer n:o %lu created\n",
node->nth_heap);
node->nth_heap);
printf("in %s line %lu and tried to free in %s line %lu.\n",
node->file_name, node->line, file_name, line);
printf(
"Hex dump of 400 bytes around memory heap first block start:\n");
ut_print_buf((byte*)(node->heap) - 200, 400);
printf("\nDump of the mem heap:\n");
mem_heap_validate_or_print(node->heap, NULL, TRUE, &error, &size,
NULL, NULL);
ut_error;
}
......
......@@ -148,7 +148,7 @@ Gets the operating system version. Currently works only on Windows. */
ulint
os_get_os_version(void)
/*===================*/
/* out: OS_WIN95, OS_WIN31, OS_WINNT (2000 == NT) */
/* out: OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */
{
#ifdef __WIN__
OSVERSIONINFO os_info;
......@@ -162,7 +162,11 @@ os_get_os_version(void)
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
return(OS_WIN95);
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
return(OS_WINNT);
if (os_info.dwMajorVersion <= 4) {
return(OS_WINNT);
} else {
return(OS_WIN2000);
}
} else {
ut_error;
return(0);
......@@ -268,9 +272,7 @@ os_file_get_last_error(void)
}
/********************************************************************
Does error handling when a file operation fails. If we have run out
of disk space, then the user can clean the disk. If we do not find
a specified file, then the user can copy it to disk. */
Does error handling when a file operation fails. */
static
ibool
os_file_handle_error(
......@@ -503,7 +505,11 @@ os_file_create(
value 2 denotes that we do not flush the log at every
commit, but only once per second */
} else {
attributes = attributes | FILE_FLAG_NO_BUFFERING;
if (srv_win_file_flush_method ==
SRV_WIN_IO_UNBUFFERED) {
attributes = attributes
| FILE_FLAG_NO_BUFFERING;
}
}
#endif
} else if (purpose == OS_FILE_NORMAL) {
......@@ -514,7 +520,11 @@ os_file_create(
value 2 denotes that we do not flush the log at every
commit, but only once per second */
} else {
attributes = attributes | FILE_FLAG_NO_BUFFERING;
if (srv_win_file_flush_method ==
SRV_WIN_IO_UNBUFFERED) {
attributes = attributes
| FILE_FLAG_NO_BUFFERING;
}
}
#endif
} else {
......@@ -1752,6 +1762,7 @@ os_aio(
os_aio_array_t* array;
os_aio_slot_t* slot;
#ifdef WIN_ASYNC_IO
ibool retval;
BOOL ret = TRUE;
DWORD len = n;
void* dummy_mess1;
......@@ -1824,6 +1835,8 @@ os_aio(
if (os_aio_use_native_aio) {
#ifdef WIN_ASYNC_IO
os_n_file_reads++;
os_bytes_read_since_printout += len;
ret = ReadFile(file, buf, (DWORD)n, &len,
&(slot->control));
#elif defined(POSIX_ASYNC_IO)
......@@ -1870,10 +1883,12 @@ os_aio(
where we also use async i/o: in Windows we must
use the same wait mechanism as for async i/o */
return(os_aio_windows_handle(ULINT_UNDEFINED,
retval = os_aio_windows_handle(ULINT_UNDEFINED,
slot->pos,
&dummy_mess1, &dummy_mess2,
&dummy_type));
&dummy_type);
return(retval);
}
return(TRUE);
......@@ -1897,8 +1912,6 @@ os_aio(
goto try_again;
}
ut_error;
return(FALSE);
}
......@@ -1958,14 +1971,14 @@ os_aio_windows_handle(
n = array->n_slots / array->n_segments;
if (array == os_aio_sync_array) {
srv_io_thread_op_info[orig_seg] = "wait windows aio for 1 page";
srv_io_thread_op_info[orig_seg] = "wait Windows aio for 1 page";
ut_ad(pos < array->n_slots);
os_event_wait(array->events[pos]);
i = pos;
} else {
srv_io_thread_op_info[orig_seg] =
"wait windows aio for n pages";
"wait Windows aio";
i = os_event_wait_multiple(n, (array->events) + segment * n);
}
......@@ -1991,10 +2004,8 @@ os_aio_windows_handle(
ut_a(TRUE == os_file_flush(slot->file));
}
} else {
os_file_get_last_error();
ut_error;
os_file_handle_error(slot->file, slot->name);
ret_val = FALSE;
}
......
......@@ -18,6 +18,23 @@ Created 9/30/1995 Heikki Tuuri
#include "ut0mem.h"
/********************************************************************
Converts the current process id to a number. It is not guaranteed that the
number is unique. In Linux returns the 'process number' of the current
thread. That number is the same as one sees in 'top', for example. In Linux
the thread id is not the same as one sees in 'top'. */
ulint
os_proc_get_number(void)
/*====================*/
{
#ifdef __WIN__
return((ulint)GetCurrentProcessId());
#else
return((ulint)getpid());
#endif
}
/********************************************************************
Allocates non-cacheable memory. */
......
......@@ -169,7 +169,7 @@ page_cur_search_with_match(
ut_ad(dtuple_check_typed(tuple));
ut_ad((mode == PAGE_CUR_L) || (mode == PAGE_CUR_LE)
|| (mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)
|| (mode == PAGE_CUR_DBG));
|| (mode == PAGE_CUR_LE_OR_EXTENDS) || (mode == PAGE_CUR_DBG));
#ifdef PAGE_CUR_ADAPT
if ((page_header_get_field(page, PAGE_LEVEL) == 0)
......@@ -232,9 +232,26 @@ page_cur_search_with_match(
low_matched_bytes = cur_matched_bytes;
} else if (cmp == -1) {
up = mid;
up_matched_fields = cur_matched_fields;
up_matched_bytes = cur_matched_bytes;
if (mode == PAGE_CUR_LE_OR_EXTENDS
&& dfield_get_len(dtuple_get_nth_field(tuple,
cur_matched_fields))
== cur_matched_bytes
&& rec_get_nth_field_len(mid_rec,
cur_matched_fields)
!= UNIV_SQL_NULL) {
/* This means current dfield is not SQL
NULL, and the current rec field extends it */
low = mid;
low_matched_fields = cur_matched_fields;
low_matched_bytes = cur_matched_bytes;
} else {
up = mid;
up_matched_fields = cur_matched_fields;
up_matched_bytes = cur_matched_bytes;
}
} else if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_LE)) {
low = mid;
......@@ -252,8 +269,8 @@ page_cur_search_with_match(
slot = page_dir_get_nth_slot(page, up);
up_rec = page_dir_slot_get_rec(slot);
/* Perform linear search until the upper and lower records
come to distance 1 of each other. */
/* Perform linear search until the upper and lower records come to
distance 1 of each other. */
while (page_rec_get_next(low_rec) != up_rec) {
......@@ -272,10 +289,25 @@ page_cur_search_with_match(
low_matched_bytes = cur_matched_bytes;
} else if (cmp == -1) {
up_rec = mid_rec;
up_matched_fields = cur_matched_fields;
up_matched_bytes = cur_matched_bytes;
if (mode == PAGE_CUR_LE_OR_EXTENDS
&& dfield_get_len(dtuple_get_nth_field(tuple,
cur_matched_fields))
== cur_matched_bytes
&& rec_get_nth_field_len(mid_rec,
cur_matched_fields)
!= UNIV_SQL_NULL) {
/* This means current dfield is not SQL
NULL, and the current rec field extends it */
low = mid;
low_matched_fields = cur_matched_fields;
low_matched_bytes = cur_matched_bytes;
} else {
up_rec = mid_rec;
up_matched_fields = cur_matched_fields;
up_matched_bytes = cur_matched_bytes;
}
} else if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_LE)) {
low_rec = mid_rec;
low_matched_fields = cur_matched_fields;
......
......@@ -1312,6 +1312,194 @@ page_rec_validate(
return(TRUE);
}
/*******************************************************************
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
even if the page is total garbage. */
ibool
page_simple_validate(
/*=================*/
/* out: TRUE if ok */
page_t* page) /* in: index page */
{
page_cur_t cur;
page_dir_slot_t* slot;
ulint slot_no;
ulint n_slots;
rec_t* rec;
byte* rec_heap_top;
ulint count;
ulint own_count;
ibool ret = FALSE;
/* Check first that the record heap and the directory do not
overlap. */
n_slots = page_dir_get_n_slots(page);
if (n_slots > UNIV_PAGE_SIZE / 4) {
fprintf(stderr,
"Nonsensical number %lu of page dir slots\n", n_slots);
goto func_exit;
}
rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
if (rec_heap_top > page_dir_get_nth_slot(page, n_slots - 1)) {
fprintf(stderr,
"Record heap and dir overlap on a page, heap top %lu, dir %lu\n",
(ulint)(page_header_get_ptr(page, PAGE_HEAP_TOP) - page),
(ulint)(page_dir_get_nth_slot(page, n_slots - 1) - page));
goto func_exit;
}
/* Validate the record list in a loop checking also that it is
consistent with the page record directory. */
count = 0;
own_count = 1;
slot_no = 0;
slot = page_dir_get_nth_slot(page, slot_no);
page_cur_set_before_first(page, &cur);
for (;;) {
rec = (&cur)->rec;
if (rec > rec_heap_top) {
fprintf(stderr,
"Record %lu is above rec heap top %lu\n",
(ulint)(rec - page), (ulint)(rec_heap_top - page));
goto func_exit;
}
if (rec_get_n_owned(rec) != 0) {
/* This is a record pointed to by a dir slot */
if (rec_get_n_owned(rec) != own_count) {
fprintf(stderr,
"Wrong owned count %lu, %lu, rec %lu\n",
rec_get_n_owned(rec), own_count,
(ulint)(rec - page));
goto func_exit;
}
if (page_dir_slot_get_rec(slot) != rec) {
fprintf(stderr,
"Dir slot does not point to right rec %lu\n",
(ulint)(rec - page));
goto func_exit;
}
own_count = 0;
if (!page_cur_is_after_last(&cur)) {
slot_no++;
slot = page_dir_get_nth_slot(page, slot_no);
}
}
if (page_cur_is_after_last(&cur)) {
break;
}
if (rec_get_next_offs(rec) < FIL_PAGE_DATA
|| rec_get_next_offs(rec) >= UNIV_PAGE_SIZE) {
fprintf(stderr,
"Next record offset nonsensical %lu for rec %lu\n",
rec_get_next_offs(rec),
(ulint)(rec - page));
goto func_exit;
}
count++;
if (count > UNIV_PAGE_SIZE) {
fprintf(stderr,
"Page record list appears to be circular %lu\n",
count);
goto func_exit;
}
page_cur_move_to_next(&cur);
own_count++;
}
if (rec_get_n_owned(rec) == 0) {
fprintf(stderr, "n owned is zero in a supremum rec\n");
goto func_exit;
}
if (slot_no != n_slots - 1) {
fprintf(stderr, "n slots wrong %lu, %lu\n",
slot_no, n_slots - 1);
goto func_exit;
}
if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) {
fprintf(stderr, "n recs wrong %lu %lu\n",
page_header_get_field(page, PAGE_N_RECS) + 2, count + 1);
goto func_exit;
}
/* Check then the free list */
rec = page_header_get_ptr(page, PAGE_FREE);
while (rec != NULL) {
if (rec < page + FIL_PAGE_DATA
|| rec >= page + UNIV_PAGE_SIZE) {
fprintf(stderr,
"Free list record has a nonsensical offset %lu\n",
(ulint)(rec - page));
goto func_exit;
}
if (rec > rec_heap_top) {
fprintf(stderr,
"Free list record %lu is above rec heap top %lu\n",
(ulint)(rec - page), (ulint)(rec_heap_top - page));
goto func_exit;
}
count++;
if (count > UNIV_PAGE_SIZE) {
fprintf(stderr,
"Page free list appears to be circular %lu\n",
count);
goto func_exit;
}
rec = page_rec_get_next(rec);
}
if (page_header_get_field(page, PAGE_N_HEAP) != count + 1) {
fprintf(stderr, "N heap is wrong %lu, %lu\n",
page_header_get_field(page, PAGE_N_HEAP), count + 1);
goto func_exit;
}
ret = TRUE;
func_exit:
return(ret);
}
/*******************************************************************
This function checks the consistency of an index page. */
......@@ -1339,6 +1527,14 @@ page_validate(
ulint i;
char err_buf[1000];
if (!page_simple_validate(page)) {
buf_page_print(page);
fprintf(stderr, "Apparent corruption in a page in index %s\n",
index->name);
return(FALSE);
}
heap = mem_heap_create(UNIV_PAGE_SIZE);
/* The following buffer is used to check that the
......
......@@ -4,8 +4,6 @@
* $Header: /home/daffy/u0/vern/flex/RCS/flex.skl,v 2.91 96/09/10 16:58:48 vern Exp $
*/
#include "univ.i"
#define FLEX_SCANNER
#define YY_FLEX_MAJOR_VERSION 2
#define YY_FLEX_MINOR_VERSION 5
......@@ -609,18 +607,13 @@ How to make the InnoDB parser and lexer C files:
6. Remove the #include of unistd.h from about line 2500 of lexyy.c
7. Move #include <math.h> in pars0grm.c after #include "univ.i" to remove
a large file compilation error on AIX.
8. Move #include "univ.i" in lexyy.c to the file start to remove a large
file compilation error on AIX.
These instructions seem to work at least with bison-1.28 and flex-2.5.4 on
Linux.
*******************************************************/
#line 36 "pars0lex.l"
#define YYSTYPE que_node_t*
#include "univ.i"
#include "pars0pars.h"
#include "pars0grm.h"
#include "pars0sym.h"
......
......@@ -102,8 +102,6 @@ que_node_t */
#include "que0que.h"
#include "row0sel.h"
#include <math.h>
#define YYSTYPE que_node_t*
/* #define __STDC__ */
......
......@@ -200,6 +200,28 @@ read_view_close(
UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
}
/*************************************************************************
Closes a consistent read view for MySQL. This function is called at an SQL
statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
void
read_view_close_for_mysql(
/*======================*/
trx_t* trx) /* in: trx which has a read view */
{
ut_a(trx->read_view);
mutex_enter(&kernel_mutex);
read_view_close(trx->read_view);
mem_heap_empty(trx->read_view_heap);
trx->read_view = NULL;
mutex_exit(&kernel_mutex);
}
/*************************************************************************
Prints a read view to stderr. */
......
......@@ -321,59 +321,6 @@ row_ins_clust_index_entry_by_modify(
return(err);
}
/*******************************************************************
Checks if a unique key violation to rec would occur at the index entry
insert. */
static
ibool
row_ins_dupl_error_with_rec(
/*========================*/
/* out: TRUE if error */
rec_t* rec, /* in: user record; NOTE that we assume
that the caller already has a record lock on
the record! */
dtuple_t* entry, /* in: entry to insert */
dict_index_t* index) /* in: index */
{
ulint matched_fields;
ulint matched_bytes;
ulint n_unique;
ulint i;
n_unique = dict_index_get_n_unique(index);
matched_fields = 0;
matched_bytes = 0;
cmp_dtuple_rec_with_match(entry, rec, &matched_fields, &matched_bytes);
if (matched_fields < n_unique) {
return(FALSE);
}
/* In a unique secondary index we allow equal key values if they
contain SQL NULLs */
if (!(index->type & DICT_CLUSTERED)) {
for (i = 0; i < n_unique; i++) {
if (UNIV_SQL_NULL == dfield_get_len(
dtuple_get_nth_field(entry, i))) {
return(FALSE);
}
}
}
if (!rec_get_deleted_flag(rec)) {
return(TRUE);
}
return(FALSE);
}
/*************************************************************************
Either deletes or sets the referencing columns SQL NULL in a child row.
Used in ON DELETE ... clause for foreign keys when a parent row is
......@@ -533,8 +480,12 @@ row_ins_foreign_delete_or_set_null(
err = lock_table(0, table, LOCK_IX, thr);
if (err == DB_SUCCESS) {
/* Here it suffices to use a LOCK_REC_NOT_GAP type lock;
we already have a normal shared lock on the appropriate
gap if the search criterion was not unique */
err = lock_clust_rec_read_check_and_lock(0, clust_rec,
clust_index, LOCK_X, thr);
clust_index, LOCK_X, LOCK_REC_NOT_GAP, thr);
}
if (err != DB_SUCCESS) {
......@@ -630,12 +581,14 @@ row_ins_foreign_delete_or_set_null(
/*************************************************************************
Sets a shared lock on a record. Used in locking possible duplicate key
records. */
records and also in checking foreign key constraints. */
static
ulint
row_ins_set_shared_rec_lock(
/*========================*/
/* out: DB_SUCCESS or error code */
ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or
LOCK_REC_NOT_GAP type lock */
rec_t* rec, /* in: record */
dict_index_t* index, /* in: index */
que_thr_t* thr) /* in: query thread */
......@@ -644,10 +597,10 @@ row_ins_set_shared_rec_lock(
if (index->type & DICT_CLUSTERED) {
err = lock_clust_rec_read_check_and_lock(0, rec, index, LOCK_S,
thr);
type, thr);
} else {
err = lock_sec_rec_read_check_and_lock(0, rec, index, LOCK_S,
thr);
type, thr);
}
return(err);
......@@ -656,7 +609,7 @@ row_ins_set_shared_rec_lock(
/*******************************************************************
Checks if foreign key constraint fails for an index entry. Sets shared locks
which lock either the success or the failure of the constraint. NOTE that
the caller must have a shared latch on dict_foreign_key_check_lock. */
the caller must have a shared latch on dict_operation_lock. */
ulint
row_ins_check_foreign_constraint(
......@@ -679,7 +632,7 @@ row_ins_check_foreign_constraint(
dict_table_t* check_table;
dict_index_t* check_index;
ulint n_fields_cmp;
ibool timeout_expired;
ibool unique_search;
rec_t* rec;
btr_pcur_t pcur;
ibool moved;
......@@ -689,7 +642,9 @@ row_ins_check_foreign_constraint(
mtr_t mtr;
run_again:
ut_ad(rw_lock_own(&dict_foreign_key_check_lock, RW_LOCK_SHARED));
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
err = DB_SUCCESS;
if (thr_get_trx(thr)->check_foreigns == FALSE) {
/* The user has suppressed foreign key checks currently for
......@@ -748,6 +703,14 @@ row_ins_check_foreign_constraint(
dtuple_set_n_fields_cmp(entry, foreign->n_fields);
if (dict_index_get_n_unique(check_index) <= foreign->n_fields) {
/* We can just set a LOCK_REC_NOT_GAP type lock */
unique_search = TRUE;
} else {
unique_search = FALSE;
}
btr_pcur_open(check_index, entry, PAGE_CUR_GE,
BTR_SEARCH_LEAF, &pcur, &mtr);
......@@ -761,25 +724,45 @@ row_ins_check_foreign_constraint(
goto next_rec;
}
/* Try to place a lock on the index record */
err = row_ins_set_shared_rec_lock(rec, check_index, thr);
if (err != DB_SUCCESS) {
break;
}
if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, rec,
check_index, thr);
if (err != DB_SUCCESS) {
break;
}
goto next_rec;
}
cmp = cmp_dtuple_rec(entry, rec);
if (cmp == 0) {
if (!rec_get_deleted_flag(rec)) {
if (rec_get_deleted_flag(rec)) {
err = row_ins_set_shared_rec_lock(LOCK_ORDINARY,
rec, check_index, thr);
if (err != DB_SUCCESS) {
break;
}
} else {
/* Found a matching record */
if (unique_search) {
err = row_ins_set_shared_rec_lock(
LOCK_REC_NOT_GAP,
rec, check_index, thr);
} else {
err = row_ins_set_shared_rec_lock(
LOCK_ORDINARY,
rec, check_index, thr);
}
if (err != DB_SUCCESS) {
break;
}
/* printf(
"FOREIGN: Found matching record from %s %s\n",
......@@ -807,6 +790,13 @@ row_ins_check_foreign_constraint(
}
if (cmp < 0) {
err = row_ins_set_shared_rec_lock(LOCK_GAP,
rec, check_index, thr);
if (err != DB_SUCCESS) {
break;
}
if (check_ref) {
err = DB_NO_REFERENCED_ROW;
} else {
......@@ -844,14 +834,14 @@ row_ins_check_foreign_constraint(
que_thr_stop_for_mysql(thr);
timeout_expired = srv_suspend_mysql_thread(thr);
srv_suspend_mysql_thread(thr);
if (!timeout_expired) {
if (thr_get_trx(thr)->error_state == DB_SUCCESS) {
goto run_again;
}
err = DB_LOCK_WAIT_TIMEOUT;
err = thr_get_trx(thr)->error_state;
}
return(err);
......@@ -890,21 +880,21 @@ row_ins_check_foreign_constraints(
trx);
}
if (!trx->has_dict_foreign_key_check_lock) {
if (!trx->has_dict_operation_lock) {
got_s_lock = TRUE;
rw_lock_s_lock(&dict_foreign_key_check_lock);
rw_lock_s_lock(&dict_operation_lock);
trx->has_dict_foreign_key_check_lock = TRUE;
trx->has_dict_operation_lock = TRUE;
}
err = row_ins_check_foreign_constraint(TRUE, foreign,
table, index, entry, thr);
if (got_s_lock) {
rw_lock_s_unlock(&dict_foreign_key_check_lock);
rw_lock_s_unlock(&dict_operation_lock);
trx->has_dict_foreign_key_check_lock = FALSE;
trx->has_dict_operation_lock = FALSE;
}
if (err != DB_SUCCESS) {
......@@ -918,6 +908,59 @@ row_ins_check_foreign_constraints(
return(DB_SUCCESS);
}
/*******************************************************************
Checks if a unique key violation to rec would occur at the index entry
insert. */
static
ibool
row_ins_dupl_error_with_rec(
/*========================*/
/* out: TRUE if error */
rec_t* rec, /* in: user record; NOTE that we assume
that the caller already has a record lock on
the record! */
dtuple_t* entry, /* in: entry to insert */
dict_index_t* index) /* in: index */
{
ulint matched_fields;
ulint matched_bytes;
ulint n_unique;
ulint i;
n_unique = dict_index_get_n_unique(index);
matched_fields = 0;
matched_bytes = 0;
cmp_dtuple_rec_with_match(entry, rec, &matched_fields, &matched_bytes);
if (matched_fields < n_unique) {
return(FALSE);
}
/* In a unique secondary index we allow equal key values if they
contain SQL NULLs */
if (!(index->type & DICT_CLUSTERED)) {
for (i = 0; i < n_unique; i++) {
if (UNIV_SQL_NULL == dfield_get_len(
dtuple_get_nth_field(entry, i))) {
return(FALSE);
}
}
}
if (!rec_get_deleted_flag(rec)) {
return(TRUE);
}
return(FALSE);
}
/*******************************************************************
Scans a unique non-clustered index at a given index entry to determine
whether a uniqueness violation has occurred for the key value of the entry.
......@@ -976,9 +1019,10 @@ row_ins_scan_sec_index_for_duplicate(
goto next_rec;
}
/* Try to place a lock on the index record */
/* Try to place a lock on the index record */
err = row_ins_set_shared_rec_lock(rec, index, thr);
err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, rec, index,
thr);
if (err != DB_SUCCESS) {
......@@ -1082,8 +1126,8 @@ row_ins_duplicate_error_in_clust(
sure that in roll-forward we get the same duplicate
errors as in original execution */
err = row_ins_set_shared_rec_lock(rec, cursor->index,
thr);
err = row_ins_set_shared_rec_lock(LOCK_REC_NOT_GAP,
rec, cursor->index, thr);
if (err != DB_SUCCESS) {
return(err);
......@@ -1105,8 +1149,8 @@ row_ins_duplicate_error_in_clust(
if (rec != page_get_supremum_rec(page)) {
err = row_ins_set_shared_rec_lock(rec, cursor->index,
thr);
err = row_ins_set_shared_rec_lock(LOCK_REC_NOT_GAP,
rec, cursor->index, thr);
if (err != DB_SUCCESS) {
return(err);
......
......@@ -27,6 +27,7 @@ Created 9/17/2000 Heikki Tuuri
#include "lock0lock.h"
#include "rem0cmp.h"
#include "log0log.h"
#include "btr0sea.h"
/* A dummy variable used to fool the compiler */
ibool row_mysql_identically_false = FALSE;
......@@ -203,7 +204,6 @@ row_mysql_handle_errors(
que_thr_t* thr, /* in: query thread */
trx_savept_t* savept) /* in: savepoint or NULL */
{
ibool timeout_expired;
ulint err;
handle_new_error:
......@@ -240,11 +240,9 @@ row_mysql_handle_errors(
/* MySQL will roll back the latest SQL statement */
} else if (err == DB_LOCK_WAIT) {
timeout_expired = srv_suspend_mysql_thread(thr);
if (timeout_expired) {
trx->error_state = DB_LOCK_WAIT_TIMEOUT;
srv_suspend_mysql_thread(thr);
if (trx->error_state != DB_SUCCESS) {
que_thr_stop_for_mysql(thr);
goto handle_new_error;
......@@ -1146,7 +1144,7 @@ row_mysql_lock_data_dictionary(void)
/* Serialize data dictionary operations with dictionary mutex:
no deadlocks or lock waits can occur then in these operations */
rw_lock_x_lock(&(dict_foreign_key_check_lock));
rw_lock_x_lock(&dict_operation_lock);
mutex_enter(&(dict_sys->mutex));
}
......@@ -1161,7 +1159,7 @@ row_mysql_unlock_data_dictionary(void)
no deadlocks can occur then in these operations */
mutex_exit(&(dict_sys->mutex));
rw_lock_x_unlock(&(dict_foreign_key_check_lock));
rw_lock_x_unlock(&dict_operation_lock);
}
/*************************************************************************
......@@ -1184,6 +1182,7 @@ row_create_table_for_mysql(
ulint err;
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
ut_ad(mutex_own(&(dict_sys->mutex)));
if (srv_created_new_raw) {
......@@ -1383,7 +1382,8 @@ row_create_index_for_mysql(
ulint namelen;
ulint keywordlen;
ulint err;
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
ut_ad(mutex_own(&(dict_sys->mutex)));
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
......@@ -1464,6 +1464,7 @@ row_table_add_foreign_constraints(
ulint err;
ut_ad(mutex_own(&(dict_sys->mutex)));
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
ut_a(sql_string);
trx->op_info = (char *) "adding foreign keys";
......@@ -1846,12 +1847,16 @@ row_drop_table_for_mysql(
no deadlocks can occur then in these operations */
if (!has_dict_mutex) {
/* Prevent foreign key checks while we are dropping the table */
rw_lock_x_lock(&(dict_foreign_key_check_lock));
/* Prevent foreign key checks etc. while we are dropping the
table */
rw_lock_x_lock(&dict_operation_lock);
mutex_enter(&(dict_sys->mutex));
}
ut_ad(mutex_own(&(dict_sys->mutex)));
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
graph = pars_sql(buf);
ut_a(graph);
......@@ -1861,9 +1866,6 @@ row_drop_table_for_mysql(
graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
/* Prevent purge from running while we are dropping the table */
rw_lock_s_lock(&(purge_sys->purge_is_running));
table = dict_table_get_low(name);
if (!table) {
......@@ -1944,12 +1946,11 @@ row_drop_table_for_mysql(
}
}
funct_exit:
rw_lock_s_unlock(&(purge_sys->purge_is_running));
funct_exit:
if (!has_dict_mutex) {
mutex_exit(&(dict_sys->mutex));
rw_lock_x_unlock(&(dict_foreign_key_check_lock));
rw_lock_x_unlock(&dict_operation_lock);
}
que_graph_free(graph);
......@@ -1985,7 +1986,7 @@ row_drop_database_for_mysql(
trx_start_if_not_started(trx);
loop:
rw_lock_x_lock(&(dict_foreign_key_check_lock));
rw_lock_x_lock(&dict_operation_lock);
mutex_enter(&(dict_sys->mutex));
while ((table_name = dict_get_first_table_name_in_db(name))) {
......@@ -2000,7 +2001,7 @@ row_drop_database_for_mysql(
if (table->n_mysql_handles_opened > 0) {
mutex_exit(&(dict_sys->mutex));
rw_lock_x_unlock(&(dict_foreign_key_check_lock));
rw_lock_x_unlock(&dict_operation_lock);
ut_print_timestamp(stderr);
fprintf(stderr,
......@@ -2028,7 +2029,7 @@ row_drop_database_for_mysql(
}
mutex_exit(&(dict_sys->mutex));
rw_lock_x_unlock(&(dict_foreign_key_check_lock));
rw_lock_x_unlock(&dict_operation_lock);
trx_commit_for_mysql(trx);
......@@ -2165,7 +2166,7 @@ row_rename_table_for_mysql(
/* Serialize data dictionary operations with dictionary mutex:
no deadlocks can occur then in these operations */
rw_lock_x_lock(&(dict_foreign_key_check_lock));
rw_lock_x_lock(&dict_operation_lock);
mutex_enter(&(dict_sys->mutex));
table = dict_table_get_low(old_name);
......@@ -2249,7 +2250,7 @@ row_rename_table_for_mysql(
}
funct_exit:
mutex_exit(&(dict_sys->mutex));
rw_lock_x_unlock(&(dict_foreign_key_check_lock));
rw_lock_x_unlock(&dict_operation_lock);
que_graph_free(graph);
......@@ -2394,18 +2395,28 @@ row_check_table_for_mysql(
row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
handle */
{
dict_table_t* table = prebuilt->table;
dict_table_t* table = prebuilt->table;
dict_index_t* index;
ulint n_rows;
ulint n_rows_in_table = ULINT_UNDEFINED;
ulint ret = DB_SUCCESS;
ulint ret = DB_SUCCESS;
ulint old_isolation_level;
prebuilt->trx->op_info = (char *) "checking table";
old_isolation_level = prebuilt->trx->isolation_level;
/* We must run the index record counts at an isolation level
>= READ COMMITTED, because a dirty read can see a wrong number
of records in some index; to play safe, we use always
REPEATABLE READ here */
prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
index = dict_table_get_first_index(table);
while (index != NULL) {
/* fprintf(stderr, "Validating index %s\n", index->name); */
/* fprintf(stderr, "Validating index %s\n", index->name); */
if (!btr_validate_tree(index->tree)) {
ret = DB_ERROR;
......@@ -2433,6 +2444,9 @@ row_check_table_for_mysql(
index = dict_table_get_next_index(index);
}
/* Restore the original isolation level */
prebuilt->trx->isolation_level = old_isolation_level;
/* We validate also the whole adaptive hash index for all tables
at every CHECK TABLE */
......
......@@ -453,7 +453,9 @@ static
ibool
row_purge_parse_undo_rec(
/*=====================*/
/* out: TRUE if purge operation required */
/* out: TRUE if purge operation required:
NOTE that then the CALLER must s-unlock
dict_operation_lock! */
purge_node_t* node, /* in: row undo node */
ibool* updated_extern,
/* out: TRUE if an externally stored field
......@@ -493,18 +495,20 @@ row_purge_parse_undo_rec(
return(FALSE);
}
/* Prevent DROP TABLE etc. from running when we are doing the purge
for this row */
rw_lock_s_lock(&dict_operation_lock);
mutex_enter(&(dict_sys->mutex));
node->table = dict_table_get_on_id_low(table_id, thr_get_trx(thr));
rw_lock_x_lock(&(purge_sys->purge_is_running));
mutex_exit(&(dict_sys->mutex));
if (node->table == NULL) {
/* The table has been dropped: no need to do purge */
rw_lock_x_unlock(&(purge_sys->purge_is_running));
rw_lock_s_unlock(&dict_operation_lock);
return(FALSE);
}
......@@ -514,7 +518,7 @@ row_purge_parse_undo_rec(
if (clust_index == NULL) {
/* The table was corrupt in the data dictionary */
rw_lock_x_unlock(&(purge_sys->purge_is_running));
rw_lock_s_unlock(&dict_operation_lock);
return(FALSE);
}
......@@ -573,6 +577,8 @@ row_purge(
} else {
purge_needed = row_purge_parse_undo_rec(node, &updated_extern,
thr);
/* If purge_needed == TRUE, we must also remember to unlock
dict_operation_lock! */
}
if (purge_needed) {
......@@ -594,7 +600,7 @@ row_purge(
btr_pcur_close(&(node->pcur));
}
rw_lock_x_unlock(&(purge_sys->purge_is_running));
rw_lock_s_unlock(&dict_operation_lock);
}
/* Do some cleanup */
......
This diff is collapsed.
......@@ -254,7 +254,8 @@ row_undo_ins_parse_undo_rec(
node->table = dict_table_get_on_id(table_id, node->trx);
if (node->table == NULL) {
return;
return;
}
clust_index = dict_table_get_first_index(node->table);
......@@ -281,7 +282,7 @@ row_undo_ins(
ut_ad(node && thr);
ut_ad(node->state == UNDO_NODE_INSERT);
row_undo_ins_parse_undo_rec(node, thr);
if (node->table == NULL) {
......@@ -292,6 +293,7 @@ row_undo_ins(
if (!found) {
trx_undo_rec_release(node->trx, node->undo_no);
return(DB_SUCCESS);
}
......
......@@ -211,7 +211,6 @@ row_undo(
if (node->state == UNDO_NODE_FETCH_NEXT) {
/* The call below also starts &mtr */
node->undo_rec = trx_roll_pop_top_rec_of_trx(trx,
trx->roll_limit,
&roll_ptr,
......@@ -254,6 +253,10 @@ row_undo(
}
}
/* Prevent DROP TABLE etc. while we are rolling back this row */
rw_lock_s_lock(&dict_operation_lock);
if (node->state == UNDO_NODE_INSERT) {
err = row_undo_ins(node, thr);
......@@ -264,6 +267,8 @@ row_undo(
err = row_undo_mod(node, thr);
}
rw_lock_s_unlock(&dict_operation_lock);
/* Do some cleanup */
btr_pcur_close(&(node->pcur));
......
......@@ -79,7 +79,7 @@ ibool
row_upd_index_is_referenced(
/*========================*/
/* out: TRUE if referenced; NOTE that since
we do not hold dict_foreign_key_check_lock
we do not hold dict_operation_lock
when leaving the function, it may be that
the referencing table has been dropped when
we leave this function: this function is only
......@@ -95,8 +95,8 @@ row_upd_index_is_referenced(
return(FALSE);
}
if (!trx->has_dict_foreign_key_check_lock) {
rw_lock_s_lock(&dict_foreign_key_check_lock);
if (!trx->has_dict_operation_lock) {
rw_lock_s_lock(&dict_operation_lock);
}
foreign = UT_LIST_GET_FIRST(table->referenced_list);
......@@ -104,8 +104,8 @@ row_upd_index_is_referenced(
while (foreign) {
if (foreign->referenced_index == index) {
if (!trx->has_dict_foreign_key_check_lock) {
rw_lock_s_unlock(&dict_foreign_key_check_lock);
if (!trx->has_dict_operation_lock) {
rw_lock_s_unlock(&dict_operation_lock);
}
return(TRUE);
......@@ -114,8 +114,8 @@ row_upd_index_is_referenced(
foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
}
if (!trx->has_dict_foreign_key_check_lock) {
rw_lock_s_unlock(&dict_foreign_key_check_lock);
if (!trx->has_dict_operation_lock) {
rw_lock_s_unlock(&dict_operation_lock);
}
return(FALSE);
......@@ -162,12 +162,12 @@ row_upd_check_references_constraints(
mtr_start(mtr);
if (!trx->has_dict_foreign_key_check_lock) {
if (!trx->has_dict_operation_lock) {
got_s_lock = TRUE;
rw_lock_s_lock(&dict_foreign_key_check_lock);
rw_lock_s_lock(&dict_operation_lock);
trx->has_dict_foreign_key_check_lock = TRUE;
trx->has_dict_operation_lock = TRUE;
}
foreign = UT_LIST_GET_FIRST(table->referenced_list);
......@@ -189,7 +189,7 @@ row_upd_check_references_constraints(
}
/* NOTE that if the thread ends up waiting for a lock
we will release dict_foreign_key_check_lock
we will release dict_operation_lock
temporarily! But the counter on the table
protects 'foreign' from being dropped while the check
is running. */
......@@ -212,8 +212,8 @@ row_upd_check_references_constraints(
if (err != DB_SUCCESS) {
if (got_s_lock) {
rw_lock_s_unlock(
&dict_foreign_key_check_lock);
trx->has_dict_foreign_key_check_lock
&dict_operation_lock);
trx->has_dict_operation_lock
= FALSE;
}
......@@ -227,8 +227,8 @@ row_upd_check_references_constraints(
}
if (got_s_lock) {
rw_lock_s_unlock(&dict_foreign_key_check_lock);
trx->has_dict_foreign_key_check_lock = FALSE;
rw_lock_s_unlock(&dict_operation_lock);
trx->has_dict_operation_lock = FALSE;
}
mem_heap_free(heap);
......
......@@ -135,8 +135,6 @@ byte srv_latin1_ordering[256] /* The sort order table of the latin1
, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
};
ibool srv_use_native_aio = FALSE;
ulint srv_pool_size = ULINT_MAX; /* size in database pages;
MySQL originally sets this
......@@ -151,8 +149,9 @@ dulint srv_archive_recovery_limit_lsn;
ulint srv_lock_wait_timeout = 1024 * 1024 * 1024;
char* srv_unix_file_flush_method_str = NULL;
ulint srv_unix_file_flush_method = 0;
char* srv_file_flush_method_str = NULL;
ulint srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
/* If the following is != 0 we do not allow inserts etc. This protects
the user from forgetting the innodb_force_recovery keyword to my.cnf */
......@@ -281,6 +280,9 @@ time_t srv_last_monitor_time;
mutex_t srv_innodb_monitor_mutex;
ulint srv_main_thread_process_no = 0;
ulint srv_main_thread_id = 0;
/*
IMPLEMENTATION OF THE SERVER MAIN PROGRAM
=========================================
......@@ -2046,13 +2048,15 @@ srv_table_reserve_slot_for_mysql(void)
}
/*******************************************************************
Puts a MySQL OS thread to wait for a lock to be released. */
Puts a MySQL OS thread to wait for a lock to be released. If an error
occurs during the wait trx->error_state associated with thr is
!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
are possible errors. DB_DEADLOCK is returned if selective deadlock
resolution chose this transaction as a victim. */
ibool
void
srv_suspend_mysql_thread(
/*=====================*/
/* out: TRUE if the lock wait timeout was
exceeded */
que_thr_t* thr) /* in: query thread associated with the MySQL
OS thread */
{
......@@ -2069,13 +2073,15 @@ srv_suspend_mysql_thread(
mutex_enter(&kernel_mutex);
trx->error_state = DB_SUCCESS;
if (thr->state == QUE_THR_RUNNING) {
/* The lock has already been released: no need to suspend */
mutex_exit(&kernel_mutex);
return(FALSE);
return;
}
slot = srv_table_reserve_slot_for_mysql();
......@@ -2101,18 +2107,18 @@ srv_suspend_mysql_thread(
srv_conc_force_exit_innodb(thr_get_trx(thr));
/* Release possible foreign key check latch */
if (trx->has_dict_foreign_key_check_lock) {
if (trx->has_dict_operation_lock) {
rw_lock_s_unlock(&dict_foreign_key_check_lock);
rw_lock_s_unlock(&dict_operation_lock);
}
/* Wait for the release */
os_event_wait(event);
if (trx->has_dict_foreign_key_check_lock) {
if (trx->has_dict_operation_lock) {
rw_lock_s_lock(&dict_foreign_key_check_lock);
rw_lock_s_lock(&dict_operation_lock);
}
/* Return back inside InnoDB */
......@@ -2131,10 +2137,9 @@ srv_suspend_mysql_thread(
if (srv_lock_wait_timeout < 100000000 &&
wait_time > (double)srv_lock_wait_timeout) {
return(TRUE);
}
return(FALSE);
trx->error_state = DB_LOCK_WAIT_TIMEOUT;
}
}
/************************************************************************
......@@ -2300,9 +2305,19 @@ srv_sprintf_innodb_monitor(
"ROW OPERATIONS\n"
"--------------\n");
buf += sprintf(buf,
"%ld queries inside InnoDB, %ld queries in queue; main thread: %s\n",
srv_conc_n_threads, srv_conc_n_waiting_threads,
"%ld queries inside InnoDB, %ld queries in queue\n",
srv_conc_n_threads, srv_conc_n_waiting_threads);
#ifdef UNIV_LINUX
buf += sprintf(buf,
"Main thread process no %lu, state: %s\n",
srv_main_thread_process_no,
srv_main_thread_op_info);
#else
buf += sprintf(buf,
"Main thread id %lu, state: %s\n",
srv_main_thread_id,
srv_main_thread_op_info);
#endif
buf += sprintf(buf,
"Number of rows inserted %lu, updated %lu, deleted %lu, read %lu\n",
srv_n_rows_inserted,
......@@ -2636,6 +2651,9 @@ srv_master_thread(
UT_NOT_USED(arg);
srv_main_thread_process_no = os_proc_get_number();
srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
srv_table_reserve_slot(SRV_MASTER);
mutex_enter(&kernel_mutex);
......
......@@ -515,7 +515,7 @@ srv_calc_high32(
}
/*************************************************************************
Creates or opens the log files. */
Creates or opens the log files and closes them. */
static
ulint
open_or_create_log_file(
......@@ -640,7 +640,7 @@ open_or_create_log_file(
}
/*************************************************************************
Creates or opens database data files. */
Creates or opens database data files and closes them. */
static
ulint
open_or_create_data_files(
......@@ -965,31 +965,63 @@ innobase_start_or_create_for_mysql(void)
srv_is_being_started = TRUE;
srv_startup_is_before_trx_rollback_phase = TRUE;
os_aio_use_native_aio = FALSE;
#ifdef __WIN__
if (os_get_os_version() == OS_WIN95
|| os_get_os_version() == OS_WIN31
|| os_get_os_version() == OS_WINNT) {
/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
and NT use simulated aio. In NT Windows provides async i/o,
but when run in conjunction with InnoDB Hot Backup, it seemed
to corrupt the data files. */
os_aio_use_native_aio = FALSE;
} else {
/* On Win 2000 and XP use async i/o */
os_aio_use_native_aio = TRUE;
}
#endif
if (srv_file_flush_method_str == NULL) {
/* These are the default options */
srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
if (0 == ut_strcmp(srv_unix_file_flush_method_str, "fdatasync")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
#ifndef __WIN__
} else if (0 == ut_strcmp(srv_file_flush_method_str, "fdatasync")) {
srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str, "O_DSYNC")) {
} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str,
} else if (0 == ut_strcmp(srv_file_flush_method_str,
"littlesync")) {
srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str, "nosync")) {
} else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
#else
} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
os_aio_use_native_aio = FALSE;
} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
os_aio_use_native_aio = FALSE;
} else if (0 == ut_strcmp(srv_file_flush_method_str,
"async_unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
#endif
} else {
fprintf(stderr,
"InnoDB: Unrecognized value %s for innodb_flush_method\n",
srv_unix_file_flush_method_str);
srv_file_flush_method_str);
return(DB_ERROR);
}
/*
printf("srv_unix set to %lu\n", srv_unix_file_flush_method);
*/
os_aio_use_native_aio = srv_use_native_aio;
err = srv_boot();
if (err != DB_SUCCESS) {
......@@ -999,34 +1031,15 @@ innobase_start_or_create_for_mysql(void)
/* Restrict the maximum number of file i/o threads */
if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) {
srv_n_file_io_threads = SRV_MAX_N_IO_THREADS;
}
#if !(defined(WIN_ASYNC_IO) || defined(POSIX_ASYNC_IO))
/* In simulated aio we currently have use only for 4 threads */
os_aio_use_native_aio = FALSE;
srv_n_file_io_threads = 4;
#endif
#ifdef __WIN__
if (os_get_os_version() == OS_WIN95
|| os_get_os_version() == OS_WIN31) {
if (!os_aio_use_native_aio) {
/* In simulated aio we currently have use only for 4 threads */
/* On Win 95, 98, ME, and Win32 subsystem for Windows 3.1 use
simulated aio */
srv_n_file_io_threads = 4;
os_aio_use_native_aio = FALSE;
srv_n_file_io_threads = 4;
} else {
/* On NT and Win 2000 always use aio */
os_aio_use_native_aio = TRUE;
}
#endif
os_aio_use_native_aio = FALSE;
if (!os_aio_use_native_aio) {
os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
* srv_n_file_io_threads,
srv_n_file_io_threads,
......@@ -1047,15 +1060,6 @@ innobase_start_or_create_for_mysql(void)
lock_sys_create(srv_lock_table_size);
#ifdef POSIX_ASYNC_IO
if (os_aio_use_native_aio) {
/* There is only one thread per async io array:
one for ibuf i/o, one for log i/o, one for ordinary reads,
one for ordinary writes; we need only 4 i/o threads */
srv_n_file_io_threads = 4;
}
#endif
/* Create i/o-handler threads: */
for (i = 0; i < srv_n_file_io_threads; i++) {
......
......@@ -663,7 +663,8 @@ rw_lock_own(
/*========*/
/* out: TRUE if locked */
rw_lock_t* lock, /* in: rw-lock */
ulint lock_type) /* in: lock type */
ulint lock_type) /* in: lock type: RW_LOCK_SHARED,
RW_LOCK_EX */
{
rw_lock_debug_t* info;
......
......@@ -901,8 +901,7 @@ sync_thread_levels_empty_gen(
if (slot->latch != NULL && (!dict_mutex_allowed ||
(slot->level != SYNC_DICT
&& slot->level != SYNC_FOREIGN_KEY_CHECK
&& slot->level != SYNC_PURGE_IS_RUNNING))) {
&& slot->level != SYNC_DICT_OPERATION))) {
lock = slot->latch;
mutex = slot->latch;
......@@ -1087,12 +1086,10 @@ sync_thread_add_level(
SYNC_IBUF_PESS_INSERT_MUTEX));
} else if (level == SYNC_DICT_AUTOINC_MUTEX) {
ut_a(sync_thread_levels_g(array, SYNC_DICT_AUTOINC_MUTEX));
} else if (level == SYNC_FOREIGN_KEY_CHECK) {
ut_a(sync_thread_levels_g(array, SYNC_FOREIGN_KEY_CHECK));
} else if (level == SYNC_DICT_OPERATION) {
ut_a(sync_thread_levels_g(array, SYNC_DICT_OPERATION));
} else if (level == SYNC_DICT_HEADER) {
ut_a(sync_thread_levels_g(array, SYNC_DICT_HEADER));
} else if (level == SYNC_PURGE_IS_RUNNING) {
ut_a(sync_thread_levels_g(array, SYNC_PURGE_IS_RUNNING));
} else if (level == SYNC_DICT) {
ut_a(buf_debug_prints
|| sync_thread_levels_g(array, SYNC_DICT));
......
......@@ -209,9 +209,6 @@ trx_purge_sys_create(void)
purge_sys->purge_undo_no = ut_dulint_zero;
purge_sys->next_stored = FALSE;
rw_lock_create(&(purge_sys->purge_is_running));
rw_lock_set_level(&(purge_sys->purge_is_running),
SYNC_PURGE_IS_RUNNING);
rw_lock_create(&(purge_sys->latch));
rw_lock_set_level(&(purge_sys->latch), SYNC_PURGE_LATCH);
......
......@@ -23,7 +23,7 @@ Created 3/26/1996 Heikki Tuuri
#include "srv0srv.h"
#include "thr0loc.h"
#include "btr0sea.h"
#include "os0proc.h"
/* Copy of the prototype for innobase_mysql_print_thd: this
copy MUST be equal to the one in mysql/sql/ha_innobase.cc ! */
......@@ -85,12 +85,14 @@ trx_create(
trx->conc_state = TRX_NOT_STARTED;
trx->start_time = time(NULL);
trx->isolation_level = TRX_ISO_REPEATABLE_READ;
trx->check_foreigns = TRUE;
trx->check_unique_secondary = TRUE;
trx->dict_operation = FALSE;
trx->mysql_thd = NULL;
trx->mysql_query_str = NULL;
trx->n_mysql_tables_in_use = 0;
trx->mysql_n_tables_locked = 0;
......@@ -132,7 +134,7 @@ trx_create(
trx->lock_heap = mem_heap_create_in_buffer(256);
UT_LIST_INIT(trx->trx_locks);
trx->has_dict_foreign_key_check_lock = FALSE;
trx->has_dict_operation_lock = FALSE;
trx->has_search_latch = FALSE;
trx->search_latch_timeout = BTR_SEA_TIMEOUT;
......@@ -175,6 +177,8 @@ trx_allocate_for_mysql(void)
mutex_exit(&kernel_mutex);
trx->mysql_thread_id = os_thread_get_curr_id();
trx->mysql_process_no = os_proc_get_number();
return(trx);
}
......@@ -1497,9 +1501,12 @@ trx_print(
default: buf += sprintf(buf, " state %lu", trx->conc_state);
}
#ifdef UNIV_LINUX
buf += sprintf(buf, ", process no %lu", trx->mysql_process_no);
#else
buf += sprintf(buf, ", OS thread id %lu",
os_thread_pf(trx->mysql_thread_id));
#endif
if (ut_strlen(trx->op_info) > 0) {
buf += sprintf(buf, " %s", trx->op_info);
}
......
......@@ -97,6 +97,8 @@ are determined in innobase_init below: */
char* innobase_data_home_dir = NULL;
char* innobase_log_group_home_dir = NULL;
char* innobase_log_arch_dir = NULL;
/* The following has a midleading name: starting from 4.0.5 this also
affects Windows */
char* innobase_unix_file_flush_method = NULL;
/* Below we have boolean-valued start-up parameters, and their default
......@@ -346,7 +348,8 @@ check_trx_exists(
trx = trx_allocate_for_mysql();
trx->mysql_thd = thd;
trx->mysql_query_str = &((*thd).query);
thd->transaction.all.innobase_tid = trx;
/* The execution of a single SQL statement is denoted by
......@@ -713,9 +716,10 @@ innobase_init(void)
DBUG_RETURN(TRUE);
}
srv_unix_file_flush_method_str = (innobase_unix_file_flush_method ?
srv_file_flush_method_str = (innobase_unix_file_flush_method ?
innobase_unix_file_flush_method :
(char*)"fdatasync");
NULL);
srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
srv_n_log_files = (ulint) innobase_log_files_in_group;
......@@ -725,8 +729,6 @@ innobase_init(void)
srv_log_buffer_size = (ulint) innobase_log_buffer_size;
srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit;
srv_use_native_aio = 0;
srv_pool_size = (ulint) innobase_buffer_pool_size;
srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
......@@ -2179,8 +2181,16 @@ convert_search_mode_to_innobase(
case HA_READ_AFTER_KEY: return(PAGE_CUR_G);
case HA_READ_BEFORE_KEY: return(PAGE_CUR_L);
case HA_READ_PREFIX: return(PAGE_CUR_GE);
case HA_READ_PREFIX_LAST: return(PAGE_CUR_LE);
/* HA_READ_PREFIX_LAST does not yet work in InnoDB! */
case HA_READ_PREFIX_LAST:
/* ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: Using HA_READ_PREFIX_LAST\n"); */
return(PAGE_CUR_LE);
/* InnoDB does not yet support ..PREFIX_LAST!
We have to add a new search flag
PAGE_CUR_LE_OR_PREFIX to InnoDB. */
/* the above PREFIX flags mean that the last
field in the key value may just be a prefix
of the complete fixed length field */
......@@ -3639,7 +3649,6 @@ ha_innobase::reset(void)
return(0);
}
/**********************************************************************
When we create a temporary table inside MySQL LOCK TABLES, MySQL will
not call external_lock for the temporary table when it uses it. Instead,
......@@ -3661,6 +3670,14 @@ ha_innobase::start_stmt(
innobase_release_stat_resources(trx);
trx_mark_sql_stat_end(trx);
if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
&& trx->read_view) {
/* At low transaction isolation levels we let
each consistent read set its own snapshot */
read_view_close_for_mysql(trx);
}
auto_inc_counter_for_this_stat = 0;
prebuilt->sql_stat_start = TRUE;
prebuilt->hint_no_need_to_fetch_extra_cols = TRUE;
......@@ -3680,6 +3697,24 @@ ha_innobase::start_stmt(
return(0);
}
/**********************************************************************
Maps a MySQL trx isolation level code to the InnoDB isolation level code */
inline
ulint
innobase_map_isolation_level(
/*=========================*/
/* out: InnoDB isolation level */
enum_tx_isolation iso) /* in: MySQL isolation level code */
{
switch(iso) {
case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
default: ut_a(0); return(0);
}
}
/**********************************************************************
As MySQL will execute an external lock for every new table it uses when it
starts to process an SQL statement (an exception is when MySQL calls
......@@ -3726,7 +3761,13 @@ ha_innobase::external_lock(
thd->transaction.all.innodb_active_trans = 1;
trx->n_mysql_tables_in_use++;
if (thd->variables.tx_isolation == ISO_SERIALIZABLE
if (thd->variables.tx_isolation != ISO_REPEATABLE_READ) {
trx->isolation_level = innobase_map_isolation_level(
(enum_tx_isolation)
thd->variables.tx_isolation);
}
if (trx->isolation_level == TRX_ISO_SERIALIZABLE
&& prebuilt->select_lock_type == LOCK_NONE) {
/* To get serializable execution we let InnoDB
......@@ -3753,6 +3794,15 @@ ha_innobase::external_lock(
innobase_release_stat_resources(trx);
if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
&& trx->read_view) {
/* At low transaction isolation levels we let
each consistent read set its own snapshot */
read_view_close_for_mysql(trx);
}
if (!(thd->options
& (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
......@@ -3777,14 +3827,13 @@ innodb_show_status(
char* buf;
DBUG_ENTER("innodb_show_status");
if (innodb_skip) {
fprintf(stderr,
"Cannot call SHOW INNODB STATUS because skip-innodb is defined\n");
fprintf(stderr,
"Cannot call SHOW INNODB STATUS because skip-innodb is defined\n");
DBUG_RETURN(-1);
}
DBUG_RETURN(-1);
}
/* We let the InnoDB Monitor to output at most 100 kB of text, add
a safety margin of 10 kB for buffer overruns */
......
......@@ -96,7 +96,7 @@ class ha_innobase: public handler
ulong index_flags(uint idx) const
{
return (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER |
HA_KEY_READ_ONLY | HA_NOT_READ_PREFIX_LAST);
HA_KEY_READ_ONLY);
}
uint max_record_length() const { return HA_MAX_REC_LENGTH; }
uint max_keys() const { return MAX_KEY; }
......
......@@ -3879,7 +3879,7 @@ static void set_options(void)
/* Set default values for some variables */
global_system_variables.table_type=DB_TYPE_MYISAM;
global_system_variables.tx_isolation=ISO_READ_COMMITTED;
global_system_variables.tx_isolation=ISO_REPEATABLE_READ;
global_system_variables.select_limit= (ulong) HA_POS_ERROR;
max_system_variables.select_limit= (ulong) HA_POS_ERROR;
global_system_variables.max_join_size= (ulong) HA_POS_ERROR;
......@@ -4351,7 +4351,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
}
global_system_variables.tx_isolation= ((opt_sql_mode & MODE_SERIALIZABLE) ?
ISO_SERIALIZABLE :
ISO_READ_COMMITTED);
ISO_REPEATABLE_READ);
break;
}
case OPT_MASTER_PASSWORD:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment