Commit 0e233879 authored by jyang's avatar jyang

Merge from mysql-trunk-innodb to local repository

parents e4a4ec39 0ecec12d
......@@ -85,7 +85,8 @@ SELECT table_schema, table_name, row_format
FROM information_schema.tables WHERE engine='innodb';
drop table t1,t2;
# The following should fail even in non-strict mode.
# The following should fail in non-strict mode too.
# (The fix of Bug #50945 only affects REDUNDANT and COMPACT tables.)
SET SESSION innodb_strict_mode = off;
--error ER_TOO_BIG_ROWSIZE
CREATE TABLE t1(
......
......@@ -24,6 +24,7 @@ SET GLOBAL innodb_file_per_table=ON;
# Generating 10 tables
# Creating a table with 94 columns and 24 indexes
DROP TABLE IF EXISTS `table0`;
set innodb_strict_mode=on;
--error ER_TOO_BIG_ROWSIZE
CREATE TABLE IF NOT EXISTS `table0`
(`col0` BOOL,
......
......@@ -232,7 +232,7 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
rem/rem0cmp.c rem/rem0rec.c
row/row0ext.c row/row0ins.c row/row0merge.c row/row0mysql.c row/row0purge.c row/row0row.c
row/row0sel.c row/row0uins.c row/row0umod.c row/row0undo.c row/row0upd.c row/row0vers.c
srv/srv0que.c srv/srv0srv.c srv/srv0start.c
srv/srv0srv.c srv/srv0start.c
sync/sync0arr.c sync/sync0rw.c sync/sync0sync.c
thr/thr0loc.c
trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c
......
......@@ -174,7 +174,6 @@ noinst_HEADERS= \
include/row0upd.ic \
include/row0vers.h \
include/row0vers.ic \
include/srv0que.h \
include/srv0srv.h \
include/srv0srv.ic \
include/srv0start.h \
......@@ -299,7 +298,6 @@ libinnobase_a_SOURCES= \
row/row0undo.c \
row/row0upd.c \
row/row0vers.c \
srv/srv0que.c \
srv/srv0srv.c \
srv/srv0start.c \
sync/sync0arr.c \
......
......@@ -1455,11 +1455,11 @@ Calculates a split record such that the tuple will certainly fit on
its half-page when the split is performed. We assume in this function
only that the cursor page has at least one user record.
@return split record, or NULL if tuple will be the first record on
upper half-page */
the lower or upper half-page (determined by btr_page_tuple_smaller()) */
static
rec_t*
btr_page_get_sure_split_rec(
/*========================*/
btr_page_get_split_rec(
/*===================*/
btr_cur_t* cursor, /*!< in: cursor at which insert should be made */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext) /*!< in: number of externally stored columns */
......@@ -1835,6 +1835,37 @@ btr_attach_half_pages(
btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
}
/*************************************************************//**
Determine if a tuple is smaller than any record on the page.
@return TRUE if smaller */
static
ibool
btr_page_tuple_smaller(
/*===================*/
btr_cur_t* cursor, /*!< in: b-tree cursor */
const dtuple_t* tuple, /*!< in: tuple to consider */
ulint* offsets,/*!< in/out: temporary storage */
ulint n_uniq, /*!< in: number of unique fields
in the index page records */
mem_heap_t** heap) /*!< in/out: heap for offsets */
{
buf_block_t* block;
const rec_t* first_rec;
page_cur_t pcur;
/* Read the first user record in the page. */
block = btr_cur_get_block(cursor);
page_cur_set_before_first(block, &pcur);
page_cur_move_to_next(&pcur);
first_rec = page_cur_get_rec(&pcur);
offsets = rec_get_offsets(
first_rec, cursor->index, offsets,
n_uniq, heap);
return(cmp_dtuple_rec(tuple, first_rec, offsets) < 0);
}
/*************************************************************//**
Splits an index page to halves and inserts the tuple. It is assumed
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
......@@ -1909,49 +1940,45 @@ btr_page_split_and_insert(
if (n_iterations > 0) {
direction = FSP_UP;
hint_page_no = page_no + 1;
split_rec = btr_page_get_sure_split_rec(cursor, tuple, n_ext);
split_rec = btr_page_get_split_rec(cursor, tuple, n_ext);
if (UNIV_UNLIKELY(split_rec == NULL)) {
insert_left = btr_page_tuple_smaller(
cursor, tuple, offsets, n_uniq, &heap);
}
} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
direction = FSP_UP;
hint_page_no = page_no + 1;
insert_left = FALSE;
} else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
direction = FSP_DOWN;
hint_page_no = page_no - 1;
ut_ad(split_rec);
} else {
direction = FSP_UP;
hint_page_no = page_no + 1;
if (page_get_n_recs(page) == 1) {
page_cur_t pcur;
/* There is only one record in the index page
therefore we can't split the node in the middle
by default. We need to determine whether the
new record will be inserted to the left or right. */
/* If there is only one record in the index page, we
can't split the node in the middle by default. We need
to determine whether the new record will be inserted
to the left or right. */
/* Read the first (and only) record in the page. */
page_cur_set_before_first(block, &pcur);
page_cur_move_to_next(&pcur);
first_rec = page_cur_get_rec(&pcur);
offsets = rec_get_offsets(
first_rec, cursor->index, offsets,
n_uniq, &heap);
/* If the new record is less than the existing record
the split in the middle will copy the existing
record to the new node. */
if (cmp_dtuple_rec(tuple, first_rec, offsets) < 0) {
split_rec = page_get_middle_rec(page);
} else {
split_rec = NULL;
}
} else {
if (page_get_n_recs(page) > 1) {
split_rec = page_get_middle_rec(page);
} else if (btr_page_tuple_smaller(cursor, tuple,
offsets, n_uniq, &heap)) {
split_rec = page_rec_get_next(
page_get_infimum_rec(page));
} else {
split_rec = NULL;
insert_left = FALSE;
}
}
/* At this point, insert_left is initialized if split_rec == NULL
and may be uninitialized otherwise. */
/* 2. Allocate a new page to the index */
new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
btr_page_get_level(page, mtr), mtr);
......@@ -1978,11 +2005,11 @@ btr_page_split_and_insert(
avoid further splits by inserting the record
to an empty page. */
split_rec = NULL;
goto insert_right;
goto insert_empty;
}
} else {
insert_right:
insert_left = FALSE;
insert_empty:
ut_ad(!split_rec);
buf = mem_alloc(rec_get_converted_size(cursor->index,
tuple, n_ext));
......@@ -2019,7 +2046,17 @@ btr_page_split_and_insert(
}
/* 5. Move then the records to the new page */
if (direction == FSP_DOWN) {
if (direction == FSP_DOWN
#ifdef UNIV_BTR_AVOID_COPY
&& page_rec_is_supremum(move_limit)) {
/* Instead of moving all records, make the new page
the empty page. */
left_block = block;
right_block = new_block;
} else if (direction == FSP_DOWN
#endif /* UNIV_BTR_AVOID_COPY */
) {
/* fputs("Split left\n", stderr); */
if (0
......@@ -2062,6 +2099,14 @@ btr_page_split_and_insert(
right_block = block;
lock_update_split_left(right_block, left_block);
#ifdef UNIV_BTR_AVOID_COPY
} else if (!split_rec) {
/* Instead of moving all records, make the new page
the empty page. */
left_block = new_block;
right_block = block;
#endif /* UNIV_BTR_AVOID_COPY */
} else {
/* fputs("Split right\n", stderr); */
......
......@@ -1184,7 +1184,6 @@ btr_cur_optimistic_insert(
ibool inherit;
ulint zip_size;
ulint rec_size;
mem_heap_t* heap = NULL;
ulint err;
*big_rec = NULL;
......@@ -1264,10 +1263,6 @@ btr_cur_optimistic_insert(
index, entry, big_rec_vec);
}
if (heap) {
mem_heap_free(heap);
}
return(DB_TOO_BIG_RECORD);
}
}
......@@ -1290,15 +1285,11 @@ btr_cur_optimistic_insert(
dtuple_convert_back_big_rec(index, entry, big_rec_vec);
}
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
return(err);
}
if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT
|| max_size < rec_size)
|| max_size < rec_size)
&& UNIV_LIKELY(page_get_n_recs(page) > 1)
&& page_get_max_insert_size(page, 1) < rec_size) {
......@@ -1364,10 +1355,6 @@ btr_cur_optimistic_insert(
}
}
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
#ifdef BTR_CUR_HASH_ADAPT
if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
btr_search_update_hash_node_on_insert(cursor);
......
......@@ -666,6 +666,21 @@ dtuple_convert_big_rec(
goto skip_field;
}
/* In DYNAMIC and COMPRESSED format, store
locally any non-BLOB columns whose maximum
length does not exceed 256 bytes. This is
because there is no room for the "external
storage" flag when the maximum length is 255
bytes or less. This restriction trivially
holds in REDUNDANT and COMPACT format, because
there we always store locally columns whose
length is up to local_len == 788 bytes.
@see rec_init_offsets_comp_ordinary */
if (ifield->col->mtype != DATA_BLOB
&& ifield->col->len < 256) {
goto skip_field;
}
longest_i = i;
longest = savings;
......
......@@ -368,8 +368,8 @@ dict_boot(void)
#if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2
#error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2"
#endif
#if DICT_SYS_INDEXES_NAME_FIELD != 1 + 2
#error "DICT_SYS_INDEXES_NAME_FIELD != 1 + 2"
#if DICT_SYS_INDEXES_NAME_FIELD != 2 + 2
#error "DICT_SYS_INDEXES_NAME_FIELD != 2 + 2"
#endif
table->id = DICT_INDEXES_ID;
......
......@@ -1105,8 +1105,11 @@ dict_create_index_step(
dulint index_id = node->index->id;
err = dict_index_add_to_cache(node->table, node->index,
FIL_NULL, TRUE);
err = dict_index_add_to_cache(
node->table, node->index, FIL_NULL,
trx_is_strict(trx)
|| dict_table_get_format(node->table)
>= DICT_TF_FORMAT_ZIP);
node->index = dict_index_get_if_in_cache_low(index_id);
ut_a(!node->index == (err != DB_SUCCESS));
......
......@@ -1971,6 +1971,19 @@ trx_is_interrupted(
return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd));
}
/**********************************************************************//**
Determines if the currently running transaction is in strict mode.
@return TRUE if strict */
extern "C" UNIV_INTERN
ibool
trx_is_strict(
/*==========*/
trx_t* trx) /*!< in: transaction */
{
return(trx && trx->mysql_thd
&& THDVAR((THD*) trx->mysql_thd, strict_mode));
}
/**************************************************************//**
Resets some fields of a prebuilt struct. The template is used in fast
retrieval of just those column values MySQL needs in its processing. */
......@@ -2283,7 +2296,7 @@ innobase_init(
}
sql_print_error("InnoDB: invalid value "
"innodb_file_format_check=%s",
"innodb_change_buffering=%s",
innobase_change_buffering);
goto mem_free_and_error;
}
......
......@@ -922,9 +922,8 @@ ha_innobase::add_index(
trx_commit_for_mysql(prebuilt->trx);
}
ut_d(dict_table_check_for_dup_indexes(innodb_table, FALSE));
if (dict_locked) {
ut_d(dict_table_check_for_dup_indexes(innodb_table, FALSE));
row_mysql_unlock_data_dictionary(trx);
}
......
......@@ -137,7 +137,7 @@ clustered index */
#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8
#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7
#define DICT_SYS_INDEXES_TYPE_FIELD 6
#define DICT_SYS_INDEXES_NAME_FIELD 3
#define DICT_SYS_INDEXES_NAME_FIELD 4
/* When a row id which is zero modulo this number (which must be a power of
two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
......
......@@ -381,6 +381,9 @@ struct que_thr_struct{
thus far */
ulint lock_state; /*!< lock state of thread (table or
row) */
struct srv_slot_struct*
slot; /* The thread slot in the wait
array in srv_sys_t */
};
#define QUE_THR_MAGIC_N 8476583
......
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/srv0que.h
Server query execution
Created 6/5/1996 Heikki Tuuri
*******************************************************/
#ifndef srv0que_h
#define srv0que_h
#include "univ.i"
#include "que0types.h"
/**********************************************************************//**
Enqueues a task to server task queue and releases a worker thread, if there
is a suspended one. */
UNIV_INTERN
void
srv_que_task_enqueue_low(
/*=====================*/
que_thr_t* thr); /*!< in: query thread */
#endif
......@@ -239,7 +239,6 @@ extern ibool srv_print_latch_waits;
# define srv_print_latch_waits FALSE
#endif /* UNIV_DEBUG */
extern ulint srv_activity_count;
extern ulint srv_fatal_semaphore_wait_threshold;
extern ulint srv_dml_needed_delay;
......@@ -314,12 +313,6 @@ typedef struct export_var_struct export_struc;
/** Status variables to be passed to MySQL */
extern export_struc export_vars;
/** The server system */
typedef struct srv_sys_struct srv_sys_t;
/** The server system */
extern srv_sys_t* srv_sys;
# ifdef UNIV_PFS_THREAD
/* Keys to register InnoDB threads with performance schema */
extern mysql_pfs_key_t trx_rollback_clean_thread_key;
......@@ -421,6 +414,8 @@ enum srv_thread_type {
be biggest) */
};
struct srv_slot_struct;
/*********************************************************************//**
Boots Innobase server.
@return DB_SUCCESS or error code */
......@@ -471,17 +466,6 @@ srv_set_io_thread_op_info(
const char* str); /*!< in: constant char string describing the
state */
/*********************************************************************//**
Releases threads of the type given from suspension in the thread table.
NOTE! The server mutex has to be reserved by the caller!
@return number of threads released: this may be less than n if not
enough threads were suspended at the moment */
UNIV_INTERN
ulint
srv_release_threads(
/*================*/
enum srv_thread_type type, /*!< in: thread type */
ulint n); /*!< in: number of threads to release */
/*********************************************************************//**
The master thread controlling the server.
@return a dummy parameter */
UNIV_INTERN
......@@ -628,6 +612,13 @@ void
srv_export_innodb_status(void);
/*==========================*/
/******************************************************************//**
Increment the server activity counter. */
UNIV_INTERN
void
srv_inc_activity_count(void);
/*=========================*/
/*********************************************************************//**
Asynchronous purge thread.
@return a dummy parameter */
......@@ -637,11 +628,23 @@ srv_purge_thread(
/*=============*/
void* arg __attribute__((unused))); /*!< in: a dummy parameter
required by os_thread_create */
/** Thread slot in the thread table */
typedef struct srv_slot_struct srv_slot_t;
/** Thread table is an array of slots */
typedef srv_slot_t srv_table_t;
/**********************************************************************//**
Enqueues a task to server task queue and releases a worker thread, if there
is a suspended one. */
UNIV_INTERN
void
srv_que_task_enqueue_low(
/*=====================*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Check whether the master thread is active.
@return FALSE is it is not active. */
UNIV_INTERN
ibool
srv_is_master_thread_active(void);
/*==============================*/
/** Status variables to be passed to MySQL */
struct export_var_struct{
......@@ -697,14 +700,6 @@ struct export_var_struct{
ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */
};
/** The server system struct */
struct srv_sys_struct{
srv_table_t* threads; /*!< server thread table */
UT_LIST_BASE_NODE_T(que_thr_t)
tasks; /*!< task queue */
};
extern ulint srv_n_threads_active[];
#else /* !UNIV_HOTBACKUP */
# define srv_use_adaptive_hash_indexes FALSE
# define srv_use_checksums TRUE
......
......@@ -105,6 +105,7 @@ extern mysql_pfs_key_t rw_lock_mutex_key;
extern mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
extern mysql_pfs_key_t srv_innodb_monitor_mutex_key;
extern mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
extern mysql_pfs_key_t srv_threads_mutex_key;
extern mysql_pfs_key_t srv_monitor_file_mutex_key;
extern mysql_pfs_key_t syn_arr_mutex_key;
# ifdef UNIV_SYNC_DEBUG
......@@ -587,6 +588,9 @@ Kernel mutex If a kernel operation needs a file
| fsp x-latch before acquiring the kernel
| mutex.
V
Threads mutex Thread scheduling mutex
|
V
Search system mutex
|
V
......@@ -657,8 +661,9 @@ or row lock! */
/*------------------------------------- MySQL binlog mutex */
/*-------------------------------*/
#define SYNC_KERNEL 300
#define SYNC_REC_LOCK 299
#define SYNC_TRX_LOCK_HEAP 298
#define SYNC_THREADS 299
#define SYNC_REC_LOCK 298
#define SYNC_TRX_LOCK_HEAP 297
#define SYNC_TRX_SYS_HEADER 290
#define SYNC_LOG 170
#define SYNC_LOG_FLUSH_ORDER 147
......
......@@ -391,6 +391,14 @@ ibool
trx_is_interrupted(
/*===============*/
trx_t* trx); /*!< in: transaction */
/**********************************************************************//**
Determines if the currently running transaction is in strict mode.
@return TRUE if strict */
UNIV_INTERN
ibool
trx_is_strict(
/*==========*/
trx_t* trx); /*!< in: transaction */
#else /* !UNIV_HOTBACKUP */
#define trx_is_interrupted(trx) FALSE
#endif /* !UNIV_HOTBACKUP */
......
......@@ -46,7 +46,7 @@ Created 1/20/1994 Heikki Tuuri
#define INNODB_VERSION_MAJOR 1
#define INNODB_VERSION_MINOR 1
#define INNODB_VERSION_BUGFIX 0
#define INNODB_VERSION_BUGFIX 1
/* The following is the InnoDB version as shown in
SELECT plugin_version FROM information_schema.plugins;
......@@ -115,7 +115,7 @@ if we are compiling on Windows. */
/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */
# include <sys/stat.h>
# if !defined(__NETWARE__) && !defined(__WIN__)
# if !defined(__NETWARE__) && !defined(__WIN__)
# include <sys/mman.h> /* mmap() for os0proc.c */
# endif
......@@ -182,6 +182,9 @@ command. Not tested on Windows. */
#define UNIV_COMPILE_TEST_FUNCS
*/
#ifdef HAVE_purify
# define UNIV_DEBUG_VALGRIND
#endif /* HAVE_purify */
#if 0
#define UNIV_DEBUG_VALGRIND /* Enable extra
Valgrind instrumentation */
......@@ -219,6 +222,10 @@ operations (very slow); also UNIV_DEBUG must be defined */
adaptive hash index */
#define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output
in sync0sync.c */
#define UNIV_BTR_AVOID_COPY /* when splitting B-tree nodes,
do not move any records when
all the records would
be moved */
#define UNIV_BTR_PRINT /* enable functions for
printing B-trees */
#define UNIV_ZIP_DEBUG /* extensive consistency checks
......
......@@ -3131,17 +3131,14 @@ logs_empty_and_mark_files_at_shutdown(void)
return; /* We SKIP ALL THE REST !! */
}
/* Check that the master thread is suspended */
if (srv_n_threads_active[SRV_MASTER] != 0) {
mutex_exit(&kernel_mutex);
mutex_exit(&kernel_mutex);
/* Check that the master thread is suspended */
if (srv_is_master_thread_active()) {
goto loop;
}
mutex_exit(&kernel_mutex);
mutex_enter(&(log_sys->mutex));
if (log_sys->n_pending_checkpoint_writes
......@@ -3199,18 +3196,14 @@ logs_empty_and_mark_files_at_shutdown(void)
mutex_exit(&(log_sys->mutex));
mutex_enter(&kernel_mutex);
/* Check that the master thread has stayed suspended */
if (srv_n_threads_active[SRV_MASTER] != 0) {
if (srv_is_master_thread_active()) {
fprintf(stderr,
"InnoDB: Warning: the master thread woke up"
" during shutdown\n");
mutex_exit(&kernel_mutex);
goto loop;
}
mutex_exit(&kernel_mutex);
fil_flush_file_spaces(FIL_TABLESPACE);
fil_flush_file_spaces(FIL_LOG);
......@@ -3228,7 +3221,8 @@ logs_empty_and_mark_files_at_shutdown(void)
srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
/* Make some checks that the server really is quiet */
ut_a(srv_n_threads_active[SRV_MASTER] == 0);
ut_a(!srv_is_master_thread_active());
ut_a(buf_all_freed());
ut_a(lsn == log_sys->lsn);
......@@ -3249,7 +3243,8 @@ logs_empty_and_mark_files_at_shutdown(void)
fil_close_all_files();
/* Make some checks that the server really is quiet */
ut_a(srv_n_threads_active[SRV_MASTER] == 0);
ut_a(!srv_is_master_thread_active());
ut_a(buf_all_freed());
ut_a(lsn == log_sys->lsn);
}
......
......@@ -29,7 +29,6 @@ Created 5/27/1996 Heikki Tuuri
#include "que0que.ic"
#endif
#include "srv0que.h"
#include "usr0sess.h"
#include "trx0trx.h"
#include "trx0roll.h"
......@@ -311,7 +310,9 @@ que_thr_end_wait_no_next_thr(
/* In MySQL we let the OS thread (not just the query thread) to wait
for the lock to be released: */
srv_release_mysql_thread_if_suspended(thr);
if (thr != NULL) {
srv_release_mysql_thread_if_suspended(thr);
}
/* srv_que_task_enqueue_low(thr); */
}
......
......@@ -212,6 +212,13 @@ rec_get_n_extern_new(
const dict_col_t* col
= dict_field_get_col(field);
len = *lens--;
/* If the maximum length of the field is up
to 255 bytes, the actual length is always
stored in one byte. If the maximum length is
more than 255 bytes, the actual length is
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the field is stored externally. */
if (UNIV_UNLIKELY(col->len > 255)
|| UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
if (len & 0x80) {
......@@ -294,6 +301,13 @@ rec_init_offsets_comp_ordinary(
const dict_col_t* col
= dict_field_get_col(field);
len = *lens--;
/* If the maximum length of the field is up
to 255 bytes, the actual length is always
stored in one byte. If the maximum length is
more than 255 bytes, the actual length is
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the field is stored externally. */
if (UNIV_UNLIKELY(col->len > 255)
|| UNIV_UNLIKELY(col->mtype
== DATA_BLOB)) {
......@@ -425,6 +439,15 @@ rec_init_offsets(
const dict_col_t* col
= dict_field_get_col(field);
len = *lens--;
/* If the maximum length of the field
is up to 255 bytes, the actual length
is always stored in one byte. If the
maximum length is more than 255 bytes,
the actual length is stored in one
byte for 0..127. The length will be
encoded in two bytes when it is 128 or
more, or when the field is stored
externally. */
if (UNIV_UNLIKELY(col->len > 255)
|| UNIV_UNLIKELY(col->mtype
== DATA_BLOB)) {
......@@ -647,6 +670,13 @@ rec_get_offsets_reverse(
const dict_col_t* col
= dict_field_get_col(field);
len = *lens++;
/* If the maximum length of the field is up
to 255 bytes, the actual length is always
stored in one byte. If the maximum length is
more than 255 bytes, the actual length is
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the field is stored externally. */
if (UNIV_UNLIKELY(col->len > 255)
|| UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
if (len & 0x80) {
......@@ -781,12 +811,20 @@ rec_get_converted_size_comp_prefix(
ut_ad(len <= col->len || col->mtype == DATA_BLOB);
/* If the maximum length of a variable-length field
is up to 255 bytes, the actual length is always stored
in one byte. If the maximum length is more than 255
bytes, the actual length is stored in one byte for
0..127. The length will be encoded in two bytes when
it is 128 or more, or when the field is stored externally. */
if (field->fixed_len) {
ut_ad(len == field->fixed_len);
/* dict_index_add_col() should guarantee this */
ut_ad(!field->prefix_len
|| field->fixed_len == field->prefix_len);
} else if (dfield_is_ext(&fields[i])) {
ut_ad(col->len >= 256 || col->mtype == DATA_BLOB);
extra_size += 2;
} else if (len < 128
|| (col->len < 256 && col->mtype != DATA_BLOB)) {
......@@ -1086,6 +1124,8 @@ rec_convert_dtuple_to_rec_comp(
/* Store the data and the offsets */
for (i = 0, field = fields; i < n_fields; i++, field++) {
const dict_field_t* ifield;
type = dfield_get_type(field);
len = dfield_get_len(field);
......@@ -1120,12 +1160,20 @@ rec_convert_dtuple_to_rec_comp(
/* only nullable fields can be null */
ut_ad(!dfield_is_null(field));
fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
ifield = dict_index_get_nth_field(index, i);
fixed_len = ifield->fixed_len;
/* If the maximum length of a variable-length field
is up to 255 bytes, the actual length is always stored
in one byte. If the maximum length is more than 255
bytes, the actual length is stored in one byte for
0..127. The length will be encoded in two bytes when
it is 128 or more, or when the field is stored externally. */
if (fixed_len) {
ut_ad(len == fixed_len);
ut_ad(!dfield_is_ext(field));
} else if (dfield_is_ext(field)) {
ut_ad(ifield->col->len >= 256
|| ifield->col->mtype == DATA_BLOB);
ut_ad(len <= REC_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE);
*lens-- = (byte) (len >> 8) | 0xc0;
......@@ -1215,11 +1263,20 @@ rec_convert_dtuple_to_rec(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
const ulint* offsets;
ulint i;
rec_offs_init(offsets_);
offsets = rec_get_offsets(rec, index,
offsets_, ULINT_UNDEFINED, &heap);
ut_ad(rec_validate(rec, offsets));
ut_ad(dtuple_get_n_fields(dtuple)
== rec_offs_n_fields(offsets));
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
ut_ad(!dfield_is_ext(dtuple_get_nth_field(dtuple, i))
== !rec_offs_nth_extern(offsets, i));
}
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
......@@ -1402,6 +1459,13 @@ rec_copy_prefix_to_buf(
prefix_len += field->fixed_len;
} else {
ulint len = *lens--;
/* If the maximum length of the column is up
to 255 bytes, the actual length is always
stored in one byte. If the maximum length is
more than 255 bytes, the actual length is
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the column is stored externally. */
if (col->len > 255 || col->mtype == DATA_BLOB) {
if (len & 0x80) {
/* 1exxxxxx */
......
......@@ -341,7 +341,7 @@ row_undo_step(
ut_ad(thr);
srv_activity_count++;
srv_inc_activity_count();
trx = thr_get_trx(thr);
......
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file srv/srv0que.c
Server query execution
Created 6/5/1996 Heikki Tuuri
*******************************************************/
#include "srv0que.h"
#include "srv0srv.h"
#include "sync0sync.h"
#include "os0thread.h"
#include "usr0sess.h"
#include "que0que.h"
/**********************************************************************//**
Enqueues a task to server task queue and releases a worker thread, if there
is a suspended one. */
UNIV_INTERN
void
srv_que_task_enqueue_low(
/*=====================*/
que_thr_t* thr) /*!< in: query thread */
{
ut_ad(thr);
ut_ad(mutex_own(&kernel_mutex));
UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
srv_release_threads(SRV_WORKER, 1);
}
......@@ -68,7 +68,6 @@ Created 10/8/1995 Heikki Tuuri
#include "sync0sync.h"
#include "thr0loc.h"
#include "que0que.h"
#include "srv0que.h"
#include "log0recv.h"
#include "pars0pars.h"
#include "usr0sess.h"
......@@ -90,10 +89,6 @@ Created 10/8/1995 Heikki Tuuri
affects only FOREIGN KEY definition parsing */
UNIV_INTERN ibool srv_lower_case_table_names = FALSE;
/* The following counter is incremented whenever there is some user activity
in the server */
UNIV_INTERN ulint srv_activity_count = 0;
/* The following is the maximum allowed duration of a lock wait. */
UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
......@@ -324,16 +319,18 @@ concurrency check. */
UNIV_INTERN ulong srv_thread_concurrency = 0;
/* this mutex protects srv_conc data structures */
UNIV_INTERN os_fast_mutex_t srv_conc_mutex;
/* number of transactions that have declared_to_be_inside_innodb set.
It used to be a non-error for this value to drop below zero temporarily.
This is no longer true. We'll, however, keep the lint datatype to add
assertions to catch any corner cases that we may have missed. */
UNIV_INTERN lint srv_conc_n_threads = 0;
/* this mutex protects srv_conc data structures */
static os_fast_mutex_t srv_conc_mutex;
/* number of OS threads waiting in the FIFO for a permission to enter
InnoDB */
UNIV_INTERN ulint srv_conc_n_waiting_threads = 0;
static ulint srv_conc_n_waiting_threads = 0;
typedef struct srv_conc_slot_struct srv_conc_slot_t;
struct srv_conc_slot_struct{
......@@ -351,9 +348,9 @@ struct srv_conc_slot_struct{
};
/* queue of threads waiting to get in */
UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue;
static UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue;
/* array of wait slots */
UNIV_INTERN srv_conc_slot_t* srv_conc_slots;
static srv_conc_slot_t* srv_conc_slots;
/* Number of times a thread is allowed to enter InnoDB within the same
SQL query after it has once got the ticket at srv_conc_enter_innodb */
......@@ -443,6 +440,8 @@ UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
/* Key to register the mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
/* Key to register srv_sys_t::mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t srv_srv_sys_mutex_key;
#endif /* UNIV_PFS_MUTEX */
/* Temporary file for innodb monitor output */
......@@ -491,6 +490,19 @@ intervals. Following macros define thresholds for these conditions. */
#define SRV_RECENT_IO_ACTIVITY (PCT_IO(5))
#define SRV_PAST_IO_ACTIVITY (PCT_IO(200))
/** Acquire the system_mutex. */
#define srv_sys_mutex_enter() do { \
mutex_enter(&srv_sys->mutex); \
} while (0)
/** Test if the system mutex is owned. */
#define srv_sys_mutex_own() mutex_own(&srv_sys->mutex)
/** Release the system mutex. */
#define srv_sys_mutex_exit() do { \
mutex_exit(&srv_sys->mutex); \
} while (0)
/*
IMPLEMENTATION OF THE SERVER MAIN PROGRAM
=========================================
......@@ -659,7 +671,7 @@ boosted at least to normal. This priority requirement can be seen similar to
the privileged mode used when processing the kernel calls in traditional
Unix.*/
/* Thread slot in the thread table */
/** Thread slot in the thread table. */
struct srv_slot_struct{
os_thread_id_t id; /*!< thread id */
os_thread_t handle; /*!< thread handle */
......@@ -675,12 +687,43 @@ struct srv_slot_struct{
used for MySQL threads) */
};
/* Table for MySQL threads where they will be suspended to wait for locks */
UNIV_INTERN srv_slot_t* srv_mysql_table = NULL;
/** Thread slot in the thread table */
typedef struct srv_slot_struct srv_slot_t;
/** Thread table is an array of slots */
typedef srv_slot_t srv_table_t;
/** The server system */
typedef struct srv_sys_struct srv_sys_t;
/** The server system struct */
struct srv_sys_struct{
mutex_t mutex; /*!< variable protecting the
fields in this structure. */
srv_table_t* sys_threads; /*!< server thread table */
UT_LIST_BASE_NODE_T(que_thr_t)
tasks; /*!< task queue */
ulint n_threads[SRV_MASTER + 1];
/*!< number of system threads
in a thread class */
ulint n_threads_active[SRV_MASTER + 1];
/*!< number of threads active
in a thread class */
srv_slot_t* waiting_threads; /*!< Array of user threads
suspended while waiting for
locks within InnoDB */
srv_slot_t* last_slot; /*!< highest slot ever used
in the waiting_threads array */
ulint activity_count; /*!< For tracking server
activity */
};
UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
UNIV_INTERN srv_sys_t* srv_sys = NULL;
/* padding to prevent other memory update hotspots from residing on
the same memory cache line */
......@@ -691,6 +734,8 @@ UNIV_INTERN mutex_t* kernel_mutex_temp;
the same memory cache line */
UNIV_INTERN byte srv_pad2[64];
static srv_sys_t* srv_sys = NULL;
#if 0
/* The following three values measure the urgency of the jobs of
buffer, version, and insert threads. They may vary from 0 - 1000.
......@@ -705,13 +750,6 @@ static ulint srv_meter_high_water2[SRV_MASTER + 1];
static ulint srv_meter_foreground[SRV_MASTER + 1];
#endif
/* The following values give info about the activity going on in
the database. They are protected by the server mutex. The arrays
are indexed by the type of the thread. */
UNIV_INTERN ulint srv_n_threads_active[SRV_MASTER + 1];
UNIV_INTERN ulint srv_n_threads[SRV_MASTER + 1];
/*********************************************************************//**
Asynchronous purge thread.
@return a dummy parameter */
......@@ -764,14 +802,15 @@ srv_table_get_nth_slot(
/*===================*/
ulint index) /*!< in: index of the slot */
{
ut_ad(srv_sys_mutex_own());
ut_a(index < OS_THREAD_MAX_N);
return(srv_sys->threads + index);
return(srv_sys->sys_threads + index);
}
/*********************************************************************//**
Gets the number of threads in the system.
@return sum of srv_n_threads[] */
@return sum of srv_sys_t::n_threads[] */
UNIV_INTERN
ulint
srv_get_n_threads(void)
......@@ -780,14 +819,14 @@ srv_get_n_threads(void)
ulint i;
ulint n_threads = 0;
mutex_enter(&kernel_mutex);
srv_sys_mutex_enter();
for (i = SRV_COM; i < SRV_MASTER + 1; i++) {
n_threads += srv_n_threads[i];
n_threads += srv_sys->n_threads[i];
}
mutex_exit(&kernel_mutex);
srv_sys_mutex_exit();
return(n_threads);
}
......@@ -806,6 +845,8 @@ srv_table_reserve_slot(
srv_slot_t* slot;
ulint i;
ut_ad(srv_sys_mutex_own());
ut_a(type > 0);
ut_a(type <= SRV_MASTER);
......@@ -846,7 +887,7 @@ srv_suspend_thread(void)
ulint slot_no;
enum srv_thread_type type;
ut_ad(mutex_own(&kernel_mutex));
srv_sys_mutex_enter();
slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
......@@ -867,12 +908,14 @@ srv_suspend_thread(void)
slot->suspended = TRUE;
ut_ad(srv_n_threads_active[type] > 0);
ut_ad(srv_sys->n_threads_active[type] > 0);
srv_n_threads_active[type]--;
srv_sys->n_threads_active[type]--;
os_event_reset(event);
srv_sys_mutex_exit();
return(event);
}
......@@ -881,23 +924,24 @@ Releases threads of the type given from suspension in the thread table.
NOTE! The server mutex has to be reserved by the caller!
@return number of threads released: this may be less than n if not
enough threads were suspended at the moment */
UNIV_INTERN
static
ulint
srv_release_threads(
/*================*/
enum srv_thread_type type, /*!< in: thread type */
ulint n) /*!< in: number of threads to release */
{
srv_slot_t* slot;
ulint i;
ulint count = 0;
ut_ad(type >= SRV_WORKER);
ut_ad(type <= SRV_MASTER);
ut_ad(n > 0);
ut_ad(mutex_own(&kernel_mutex));
srv_sys_mutex_enter();
for (i = 0; i < OS_THREAD_MAX_N; i++) {
srv_slot_t* slot;
slot = srv_table_get_nth_slot(i);
......@@ -905,7 +949,7 @@ srv_release_threads(
slot->suspended = FALSE;
srv_n_threads_active[type]++;
srv_sys->n_threads_active[type]++;
os_event_set(slot->event);
......@@ -925,6 +969,8 @@ srv_release_threads(
}
}
srv_sys_mutex_exit();
return(count);
}
......@@ -940,7 +986,7 @@ srv_get_thread_type(void)
srv_slot_t* slot;
enum srv_thread_type type;
mutex_enter(&kernel_mutex);
srv_sys_mutex_enter();
slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
......@@ -951,7 +997,7 @@ srv_get_thread_type(void)
ut_ad(type >= SRV_WORKER);
ut_ad(type <= SRV_MASTER);
mutex_exit(&kernel_mutex);
srv_sys_mutex_exit();
return(type);
}
......@@ -963,11 +1009,14 @@ void
srv_init(void)
/*==========*/
{
srv_conc_slot_t* conc_slot;
srv_slot_t* slot;
ulint i;
srv_conc_slot_t* conc_slot;
ulint srv_sys_sz;
srv_sys = mem_alloc(sizeof(srv_sys_t));
srv_sys_sz = sizeof(*srv_sys)
+ (OS_THREAD_MAX_N * sizeof(srv_slot_t) * 2);
srv_sys = mem_zalloc(srv_sys_sz);
kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
mutex_create(kernel_mutex_key, &kernel_mutex, SYNC_KERNEL);
......@@ -975,42 +1024,30 @@ srv_init(void)
mutex_create(srv_innodb_monitor_mutex_key,
&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
srv_sys->threads = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
mutex_create(srv_srv_sys_mutex_key, &srv_sys->mutex, SYNC_THREADS);
for (i = 0; i < OS_THREAD_MAX_N; i++) {
slot = srv_table_get_nth_slot(i);
slot->in_use = FALSE;
slot->type=0; /* Avoid purify errors */
slot->event = os_event_create(NULL);
ut_a(slot->event);
}
srv_sys_mutex_enter();
srv_mysql_table = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1];
srv_sys->waiting_threads = srv_sys->sys_threads + OS_THREAD_MAX_N;
srv_sys->last_slot = srv_sys->waiting_threads;
for (i = 0; i < OS_THREAD_MAX_N; i++) {
slot = srv_mysql_table + i;
slot->in_use = FALSE;
slot->type = 0;
srv_slot_t* slot;
slot = srv_table_get_nth_slot(i);
slot->event = os_event_create(NULL);
ut_a(slot->event);
}
srv_lock_timeout_thread_event = os_event_create(NULL);
for (i = 0; i < SRV_MASTER + 1; i++) {
srv_n_threads_active[i] = 0;
srv_n_threads[i] = 0;
#if 0
srv_meter[i] = 30;
srv_meter_low_water[i] = 50;
srv_meter_high_water[i] = 100;
srv_meter_high_water2[i] = 200;
srv_meter_foreground[i] = 250;
#endif
}
UT_LIST_INIT(srv_sys->tasks);
srv_sys_mutex_exit();
/* Create dummy indexes for infimum and supremum records */
dict_ind_init();
......@@ -1045,14 +1082,11 @@ srv_free(void)
mem_free(srv_conc_slots);
srv_conc_slots = NULL;
mem_free(srv_sys->threads);
mem_free(srv_sys);
srv_sys = NULL;
mem_free(kernel_mutex_temp);
kernel_mutex_temp = NULL;
mem_free(srv_mysql_table);
srv_mysql_table = NULL;
trx_i_s_cache_free(trx_i_s_cache);
}
......@@ -1408,67 +1442,150 @@ srv_boot(void)
return(DB_SUCCESS);
}
/*********************************************************************//**
Print the contents of the srv_sys_t::waiting_threads array. */
static
void
srv_print_mysql_threads(void)
/*=========================*/
{
ulint i;
for (i = 0; i < OS_THREAD_MAX_N; i++) {
srv_slot_t* slot;
slot = srv_sys->waiting_threads + i;
fprintf(stderr,
"Slot %lu: thread id %lu, type %lu,"
" in use %lu, susp %lu, time %lu\n",
(ulong) i,
(ulong) os_thread_pf(slot->id),
(ulong) slot->type,
(ulong) slot->in_use,
(ulong) slot->suspended,
(ulong) difftime(ut_time(), slot->suspend_time));
}
}
/*********************************************************************//**
Release a slot in the srv_sys_t::waiting_threads. Adjust the array last pointer
if there are empty slots towards the end of the table. */
static
void
srv_table_release_slot_for_mysql(
/*=============================*/
srv_slot_t* slot) /*!< in: slot to release */
{
#ifdef UNIV_DEBUG
srv_slot_t* upper = srv_sys->waiting_threads + OS_THREAD_MAX_N;
#endif /* UNIV_DEBUG */
srv_sys_mutex_enter();
ut_a(slot->in_use);
ut_a(slot->thr != NULL);
ut_a(slot->thr->slot != NULL);
ut_a(slot->thr->slot == slot);
/* Must be within the array boundaries. */
ut_ad(slot >= srv_sys->waiting_threads);
ut_ad(slot < upper);
slot->thr->slot = NULL;
slot->thr = NULL;
slot->in_use = FALSE;
/* Scan backwards and adjust the last free slot pointer. */
for (slot = srv_sys->last_slot;
slot > srv_sys->waiting_threads && !slot->in_use;
--slot) {
/* No op */
}
/* Either the array is empty or the last scanned slot is in use. */
ut_ad(slot->in_use || slot == srv_sys->waiting_threads);
srv_sys->last_slot = slot + 1;
/* The last slot is either outside of the array boundry or it's
on an empty slot. */
ut_ad(srv_sys->last_slot == upper || !srv_sys->last_slot->in_use);
ut_ad(srv_sys->last_slot >= srv_sys->waiting_threads);
ut_ad(srv_sys->last_slot <= upper);
srv_sys_mutex_exit();
}
/*********************************************************************//**
Reserves a slot in the thread table for the current MySQL OS thread.
NOTE! The kernel mutex has to be reserved by the caller!
@return reserved slot */
static
srv_slot_t*
srv_table_reserve_slot_for_mysql(void)
/*==================================*/
srv_table_reserve_slot_for_mysql(
/*=============================*/
que_thr_t* thr) /*!< in: query thread associated
with the MySQL OS thread */
{
srv_slot_t* slot;
ulint i;
srv_slot_t* slot;
ut_ad(mutex_own(&kernel_mutex));
srv_sys_mutex_enter();
i = 0;
slot = srv_mysql_table + i;
slot = srv_sys->waiting_threads;
while (slot->in_use) {
i++;
for (i = 0; i < OS_THREAD_MAX_N; ++i, ++slot) {
if (!slot->in_use) {
break;
}
}
if (i >= OS_THREAD_MAX_N) {
/* Check if we have run out of slots. */
if (slot == srv_sys->waiting_threads+ OS_THREAD_MAX_N) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: There appear to be %lu MySQL"
" threads currently waiting\n"
"InnoDB: inside InnoDB, which is the"
" upper limit. Cannot continue operation.\n"
"InnoDB: We intentionally generate"
" a seg fault to print a stack trace\n"
"InnoDB: on Linux. But first we print"
" a list of waiting threads.\n", (ulong) i);
ut_print_timestamp(stderr);
for (i = 0; i < OS_THREAD_MAX_N; i++) {
fprintf(stderr,
" InnoDB: There appear to be %lu MySQL"
" threads currently waiting\n"
"InnoDB: inside InnoDB, which is the"
" upper limit. Cannot continue operation.\n"
"InnoDB: We intentionally generate"
" a seg fault to print a stack trace\n"
"InnoDB: on Linux. But first we print"
" a list of waiting threads.\n", (ulong) i);
srv_print_mysql_threads();
ut_error;
} else {
slot = srv_mysql_table + i;
ut_a(slot->in_use == FALSE);
fprintf(stderr,
"Slot %lu: thread id %lu, type %lu,"
" in use %lu, susp %lu, time %lu\n",
(ulong) i,
(ulong) os_thread_pf(slot->id),
(ulong) slot->type,
(ulong) slot->in_use,
(ulong) slot->suspended,
(ulong) difftime(ut_time(),
slot->suspend_time));
}
slot->in_use = TRUE;
slot->thr = thr;
slot->thr->slot = slot;
slot->id = os_thread_get_curr_id();
slot->handle = os_thread_get_curr();
ut_error;
if (slot->event == NULL) {
slot->event = os_event_create(NULL);
ut_a(slot->event);
}
slot = srv_mysql_table + i;
os_event_reset(slot->event);
slot->suspended = TRUE;
slot->suspend_time = ut_time();
}
ut_a(slot->in_use == FALSE);
if (slot == srv_sys->last_slot) {
++srv_sys->last_slot;
}
slot->in_use = TRUE;
slot->id = os_thread_get_curr_id();
slot->handle = os_thread_get_curr();
ut_ad(srv_sys->last_slot <= srv_sys->waiting_threads+ OS_THREAD_MAX_N);
srv_sys_mutex_exit();
return(slot);
}
......@@ -1487,7 +1604,6 @@ srv_suspend_mysql_thread(
OS thread */
{
srv_slot_t* slot;
os_event_t event;
double wait_time;
trx_t* trx;
ulint had_dict_lock;
......@@ -1529,15 +1645,7 @@ srv_suspend_mysql_thread(
ut_ad(thr->is_active == FALSE);
slot = srv_table_reserve_slot_for_mysql();
event = slot->event;
slot->thr = thr;
os_event_reset(event);
slot->suspend_time = ut_time();
slot = srv_table_reserve_slot_for_mysql(thr);
if (thr->lock_state == QUE_THR_LOCK_ROW) {
srv_n_lock_wait_count++;
......@@ -1583,7 +1691,7 @@ srv_suspend_mysql_thread(
/* Suspend this thread and wait for the event. */
os_event_wait(event);
os_event_wait(slot->event);
/* After resuming, reacquire the data dictionary latch if
necessary. */
......@@ -1604,13 +1712,13 @@ srv_suspend_mysql_thread(
srv_conc_force_enter_innodb(trx);
}
wait_time = ut_difftime(ut_time(), slot->suspend_time);
mutex_enter(&kernel_mutex);
/* Release the slot for others to use */
slot->in_use = FALSE;
wait_time = ut_difftime(ut_time(), slot->suspend_time);
srv_table_release_slot_for_mysql(slot);
if (thr->lock_state == QUE_THR_LOCK_ROW) {
if (ut_usectime(&sec, &ms) == -1) {
......@@ -1663,25 +1771,13 @@ srv_release_mysql_thread_if_suspended(
que_thr_t* thr) /*!< in: query thread associated with the
MySQL OS thread */
{
srv_slot_t* slot;
ulint i;
ut_ad(mutex_own(&kernel_mutex));
for (i = 0; i < OS_THREAD_MAX_N; i++) {
slot = srv_mysql_table + i;
if (slot->in_use && slot->thr == thr) {
/* Found */
if (thr->slot != NULL) {
ut_a(thr->slot->in_use);
os_event_set(slot->event);
return;
}
os_event_set(thr->slot->event);
}
/* not found */
}
/******************************************************************//**
......@@ -2156,6 +2252,83 @@ srv_monitor_thread(
OS_THREAD_DUMMY_RETURN;
}
/*********************************************************************//**
Check if the thread lock wait has timed out. Release its locks if the
wait has actually timed out. */
UNIV_INTERN
void
srv_lock_check_wait(
/*================*/
srv_slot_t* slot)
{
trx_t* trx;
double wait_time;
ulong lock_wait_timeout;
ib_time_t suspend_time = slot->suspend_time;
ut_ad(srv_sys_mutex_own());
wait_time = ut_difftime(ut_time(), suspend_time);
trx = thr_get_trx(slot->thr);
lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd);
if (trx_is_interrupted(trx)
|| (lock_wait_timeout < 100000000
&& (wait_time > (double) lock_wait_timeout
|| wait_time < 0))) {
/* Timeout exceeded or a wrap-around in system
time counter: cancel the lock request queued
by the transaction and release possible
other transactions waiting behind; it is
possible that the lock has already been
granted: in that case do nothing */
if (trx->wait_lock) {
trx_t* slot_trx;
/* Release the srv_sys_t->mutex to preserve the
latch order only. */
srv_sys_mutex_exit();
/* It is possible that the thread has already
freed its slot and released its locks and another
thread is now using this slot. We need to
check whether the slot is still in use by the
same thread before cancelling the wait and releasing
the locks. */
mutex_enter(&kernel_mutex);
srv_sys_mutex_enter();
slot_trx = thr_get_trx(slot->thr);
/* We can't compare the pointers here because the
memory can be recycled. Transaction ids are not
recyled and therefore safe to use. We also check if
the transaction suspend time is the same that we
used for calculating the wait earlier. If the
transaction has already released its locks there
is nothing more we can do. */
if (slot->in_use
&& suspend_time == slot->suspend_time
&& ut_dulint_cmp(trx->id, slot_trx->id) == 0
&& trx->wait_lock != NULL) {
ut_a(trx->que_state == TRX_QUE_LOCK_WAIT);
lock_cancel_waiting_and_release(
trx->wait_lock);
}
mutex_exit(&kernel_mutex);
}
}
}
/*********************************************************************//**
A thread which wakes up threads whose lock wait may have lasted too long.
@return a dummy parameter */
......@@ -2169,8 +2342,6 @@ srv_lock_timeout_thread(
{
srv_slot_t* slot;
ibool some_waits;
double wait_time;
ulint i;
#ifdef UNIV_PFS_THREAD
pfs_register_thread(srv_lock_timeout_thread_key);
......@@ -2184,52 +2355,26 @@ srv_lock_timeout_thread(
srv_lock_timeout_active = TRUE;
mutex_enter(&kernel_mutex);
srv_sys_mutex_enter();
some_waits = FALSE;
/* Check of all slots if a thread is waiting there, and if it
has exceeded the time limit */
for (i = 0; i < OS_THREAD_MAX_N; i++) {
/* Check all slots for user threads that are waiting on locks, and
if they have exceeded the time limit. */
slot = srv_mysql_table + i;
for (slot = srv_sys->waiting_threads;
slot < srv_sys->last_slot;
++slot) {
if (slot->in_use) {
trx_t* trx;
ulong lock_wait_timeout;
some_waits = TRUE;
wait_time = ut_difftime(ut_time(), slot->suspend_time);
trx = thr_get_trx(slot->thr);
lock_wait_timeout = thd_lock_wait_timeout(
trx->mysql_thd);
if (trx_is_interrupted(trx)
|| (lock_wait_timeout < 100000000
&& (wait_time > (double) lock_wait_timeout
|| wait_time < 0))) {
/* Timeout exceeded or a wrap-around in system
time counter: cancel the lock request queued
by the transaction and release possible
other transactions waiting behind; it is
possible that the lock has already been
granted: in that case do nothing */
if (trx->wait_lock) {
lock_cancel_waiting_and_release(
trx->wait_lock);
}
}
srv_lock_check_wait(slot);
}
}
os_event_reset(srv_lock_timeout_thread_event);
mutex_exit(&kernel_mutex);
srv_sys_mutex_exit();
if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
goto exit_func;
......@@ -2367,26 +2512,68 @@ srv_error_monitor_thread(
OS_THREAD_DUMMY_RETURN;
}
/******************************************************************//**
Increment the server activity count. */
UNIV_INLINE
void
srv_inc_activity_count_low(void)
/*============================*/
{
srv_sys_mutex_enter();
++srv_sys->activity_count;
srv_sys_mutex_exit();
}
/******************************************************************//**
Increment the server activity count. */
UNIV_INTERN
void
srv_inc_activity_count(void)
/*========================*/
{
srv_inc_activity_count_low();
}
/**********************************************************************//**
Check whether the master thread is active.
@return FALSE is it is not active. */
UNIV_INTERN
ibool
srv_is_master_thread_active(void)
/*=============================*/
{
ibool ret;
srv_sys_mutex_enter();
ret = srv_sys->n_threads_active[SRV_MASTER] != 0;
srv_sys_mutex_exit();
return(ret);
}
/*******************************************************************//**
Tells the InnoDB server that there has been activity in the database
and wakes up the master thread if it is suspended (not sleeping). Used
in the MySQL interface. Note that there is a small chance that the master
thread stays suspended (we do not protect our operation with the kernel
mutex, for performace reasons). */
thread stays suspended (we do not protect our operation with the
srv_sys_t->mutex, for performance reasons). */
UNIV_INTERN
void
srv_active_wake_master_thread(void)
/*===============================*/
{
srv_activity_count++;
ut_ad(!mutex_own(&kernel_mutex));
ut_ad(!srv_sys_mutex_own());
if (srv_n_threads_active[SRV_MASTER] == 0) {
srv_inc_activity_count_low();
mutex_enter(&kernel_mutex);
if (srv_sys->n_threads_active[SRV_MASTER] == 0) {
srv_release_threads(SRV_MASTER, 1);
mutex_exit(&kernel_mutex);
}
}
......@@ -2394,23 +2581,20 @@ srv_active_wake_master_thread(void)
Tells the purge thread that there has been activity in the database
and wakes up the purge thread if it is suspended (not sleeping). Note
that there is a small chance that the purge thread stays suspended
(we do not protect our operation with the kernel mutex, for
performace reasons). */
(we do not protect our operation with the srv_sys_t:mutex, for
performance reasons). */
UNIV_INTERN
void
srv_wake_purge_thread_if_not_active(void)
/*=====================================*/
{
ut_ad(!mutex_own(&kernel_mutex));
ut_ad(!srv_sys_mutex_own());
if (srv_n_purge_threads > 0
&& srv_n_threads_active[SRV_WORKER] == 0) {
mutex_enter(&kernel_mutex);
&& srv_sys->n_threads_active[SRV_WORKER] == 0) {
srv_release_threads(SRV_WORKER, 1);
mutex_exit(&kernel_mutex);
}
}
......@@ -2421,13 +2605,12 @@ void
srv_wake_master_thread(void)
/*========================*/
{
srv_activity_count++;
ut_ad(!mutex_own(&kernel_mutex));
ut_ad(!srv_sys_mutex_own());
mutex_enter(&kernel_mutex);
srv_inc_activity_count_low();
srv_release_threads(SRV_MASTER, 1);
mutex_exit(&kernel_mutex);
}
/*******************************************************************//**
......@@ -2438,17 +2621,34 @@ srv_wake_purge_thread(void)
/*=======================*/
{
ut_ad(!mutex_own(&kernel_mutex));
ut_ad(!srv_sys_mutex_own());
if (srv_n_purge_threads > 0) {
mutex_enter(&kernel_mutex);
srv_release_threads(SRV_WORKER, 1);
mutex_exit(&kernel_mutex);
}
}
/*******************************************************************//**
Check if there has been any activity.
@return FALSE if no hange in activity counter. */
UNIV_INLINE
ibool
srv_check_activity(
/*===============*/
ulint old_activity_count) /*!< old activity count */
{
ibool ret;
srv_sys_mutex_enter();
ret = srv_sys->activity_count != old_activity_count;
srv_sys_mutex_exit();
return(ret);
}
/**********************************************************************
The master thread is tasked to ensure that flush of log file happens
once every second in the background. This is to ensure that not more
......@@ -2535,13 +2735,13 @@ srv_master_thread(
srv_main_thread_process_no = os_proc_get_number();
srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
srv_table_reserve_slot(SRV_MASTER);
srv_sys_mutex_enter();
mutex_enter(&kernel_mutex);
srv_table_reserve_slot(SRV_MASTER);
srv_n_threads_active[SRV_MASTER]++;
srv_sys->n_threads_active[SRV_MASTER]++;
mutex_exit(&kernel_mutex);
srv_sys_mutex_exit();
loop:
/*****************************************************************/
......@@ -2553,12 +2753,13 @@ srv_master_thread(
buf_get_total_stat(&buf_stat);
n_ios_very_old = log_sys->n_log_ios + buf_stat.n_pages_read
+ buf_stat.n_pages_written;
mutex_enter(&kernel_mutex);
srv_sys_mutex_enter();
/* Store the user activity counter at the start of this loop */
old_activity_count = srv_activity_count;
old_activity_count = srv_sys->activity_count;
mutex_exit(&kernel_mutex);
srv_sys_mutex_exit();
if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
......@@ -2664,7 +2865,7 @@ srv_master_thread(
}
}
if (srv_activity_count == old_activity_count) {
if (srv_sys->activity_count == old_activity_count) {
/* There is no user activity at the moment, go to
the background loop */
......@@ -2755,18 +2956,13 @@ srv_master_thread(
srv_main_thread_op_info = "reserving kernel mutex";
mutex_enter(&kernel_mutex);
/* ---- When there is database activity, we jump from here back to
the start of loop */
if (srv_activity_count != old_activity_count) {
mutex_exit(&kernel_mutex);
if (srv_check_activity(old_activity_count)) {
goto loop;
}
mutex_exit(&kernel_mutex);
/* If the database is quiet, we enter the background loop */
/*****************************************************************/
......@@ -2799,12 +2995,9 @@ srv_master_thread(
srv_main_thread_op_info = "reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
mutex_exit(&kernel_mutex);
if (srv_check_activity(old_activity_count)) {
goto loop;
}
mutex_exit(&kernel_mutex);
srv_main_thread_op_info = "doing insert buffer merge";
......@@ -2821,12 +3014,9 @@ srv_master_thread(
srv_main_thread_op_info = "reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
mutex_exit(&kernel_mutex);
if (srv_check_activity(old_activity_count)) {
goto loop;
}
mutex_exit(&kernel_mutex);
flush_loop:
srv_main_thread_op_info = "flushing buffer pool pages";
......@@ -2843,12 +3033,9 @@ srv_master_thread(
srv_main_thread_op_info = "reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
mutex_exit(&kernel_mutex);
if (srv_check_activity(old_activity_count)) {
goto loop;
}
mutex_exit(&kernel_mutex);
srv_main_thread_op_info = "waiting for buffer pool flush to end";
buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
......@@ -2870,12 +3057,10 @@ srv_master_thread(
srv_main_thread_op_info = "reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
mutex_exit(&kernel_mutex);
if (srv_check_activity(old_activity_count)) {
goto loop;
}
mutex_exit(&kernel_mutex);
/*
srv_main_thread_op_info = "archiving log (if log archive is on)";
......@@ -2921,10 +3106,10 @@ srv_master_thread(
goto loop;
}
event = srv_suspend_thread();
mutex_exit(&kernel_mutex);
event = srv_suspend_thread();
/* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
waits for database activity to die down when converting < 4.1.x
databases, and relies on this string being exactly as it is. InnoDB
......@@ -2974,13 +3159,13 @@ srv_purge_thread(
os_thread_pf(os_thread_get_curr_id()));
#endif /* UNIV_DEBUG_THREAD_CREATION */
mutex_enter(&kernel_mutex);
srv_sys_mutex_enter();
slot_no = srv_table_reserve_slot(SRV_WORKER);
++srv_n_threads_active[SRV_WORKER];
++srv_sys->n_threads_active[SRV_WORKER];
mutex_exit(&kernel_mutex);
srv_sys_mutex_exit();
while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
......@@ -2996,12 +3181,8 @@ srv_purge_thread(
os_event_t event;
mutex_enter(&kernel_mutex);
event = srv_suspend_thread();
mutex_exit(&kernel_mutex);
os_event_wait(event);
}
......@@ -3030,13 +3211,13 @@ srv_purge_thread(
/* Free the thread local memory. */
thr_local_free(os_thread_get_curr_id());
mutex_enter(&kernel_mutex);
srv_sys_mutex_enter();
/* Free the slot for reuse. */
slot = srv_table_get_nth_slot(slot_no);
slot->in_use = FALSE;
mutex_exit(&kernel_mutex);
srv_sys_mutex_exit();
#ifdef UNIV_DEBUG_THREAD_CREATION
fprintf(stderr, "InnoDB: Purge thread exiting, id %lu\n",
......@@ -3049,3 +3230,24 @@ srv_purge_thread(
OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
}
/**********************************************************************//**
Enqueues a task to server task queue and releases a worker thread, if there
is a suspended one. */
UNIV_INTERN
void
srv_que_task_enqueue_low(
/*=====================*/
que_thr_t* thr) /*!< in: query thread */
{
ut_ad(thr);
srv_sys_mutex_enter();
UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
srv_sys_mutex_exit();
srv_release_threads(SRV_WORKER, 1);
}
......@@ -1167,6 +1167,7 @@ sync_thread_add_level(
case SYNC_SEARCH_SYS_CONF:
case SYNC_TRX_LOCK_HEAP:
case SYNC_KERNEL:
case SYNC_THREADS:
case SYNC_IBUF_BITMAP_MUTEX:
case SYNC_RSEG:
case SYNC_TRX_UNDO:
......
......@@ -350,8 +350,13 @@ trx_undo_rec_get_col_val(
ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
ut_ad(*len > *orig_len);
ut_ad(*len >= REC_MAX_INDEX_COL_LEN
/* @see dtuple_convert_big_rec() */
ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE * 2);
/* we do not have access to index->table here
ut_ad(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP
|| *len >= REC_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE);
*/
*len += UNIV_EXTERN_STORAGE_FIELD;
break;
......@@ -1075,11 +1080,15 @@ trx_undo_rec_get_partial_row(
/* If the prefix of this column is indexed,
ensure that enough prefix is stored in the
undo log record. */
ut_a(ignore_prefix
|| !col->ord_part
|| dfield_get_len(dfield)
>= REC_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE);
if (!ignore_prefix && col->ord_part) {
ut_a(dfield_get_len(dfield)
>= 2 * BTR_EXTERN_FIELD_REF_SIZE);
ut_a(dict_table_get_format(index->table)
>= DICT_TF_FORMAT_ZIP
|| dfield_get_len(dfield)
>= REC_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE);
}
}
}
......
......@@ -37,7 +37,6 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0rec.h"
#include "que0que.h"
#include "usr0sess.h"
#include "srv0que.h"
#include "srv0start.h"
#include "row0undo.h"
#include "row0mysql.h"
......
......@@ -847,7 +847,7 @@ trx_commit_off_kernel(
recovery i.e.: back ground rollback thread is still active
then there is a chance that the rollback thread may see
this trx as COMMITTED_IN_MEMORY and goes adhead to clean it
up calling trx_cleanup_at_db_startup(). This can happen
up calling trx_cleanup_at_db_startup(). This can happen
in the case we are committing a trx here that is left in
PREPARED state during the crash. Note that commit of the
rollback of a PREPARED trx happens in the recovery thread
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment