Commit bcfabf43 authored by Vasil Dimov's avatar Vasil Dimov

Merge mysql-5.1-innodb -> mysql-5.1

parents 173551bb 4ffb26de
...@@ -93,10 +93,13 @@ sync_arr_wake_threads_if_sema_free(void); ...@@ -93,10 +93,13 @@ sync_arr_wake_threads_if_sema_free(void);
Prints warnings of long semaphore waits to stderr. */ Prints warnings of long semaphore waits to stderr. */
ibool ibool
sync_array_print_long_waits(void); sync_array_print_long_waits(
/*=============================*/ /*========================*/
/* out: TRUE if fatal semaphore wait threshold /* out: TRUE if fatal semaphore wait threshold
was exceeded */ was exceeded */
os_thread_id_t* waiter, /* out: longest waiting thread */
const void** sema) /* out: longest-waited-for semaphore */
__attribute__((nonnull));
/************************************************************************ /************************************************************************
Validates the integrity of the wait array. Checks Validates the integrity of the wait array. Checks
that the number of reserved cells equals the count variable. */ that the number of reserved cells equals the count variable. */
......
...@@ -2180,9 +2180,15 @@ srv_error_monitor_thread( ...@@ -2180,9 +2180,15 @@ srv_error_monitor_thread(
os_thread_create */ os_thread_create */
{ {
/* number of successive fatal timeouts observed */ /* number of successive fatal timeouts observed */
ulint fatal_cnt = 0; ulint fatal_cnt = 0;
dulint old_lsn; dulint old_lsn;
dulint new_lsn; dulint new_lsn;
/* longest waiting thread for a semaphore */
os_thread_id_t waiter = os_thread_get_curr_id();
os_thread_id_t old_waiter = waiter;
/* the semaphore that is being waited for */
const void* sema = NULL;
const void* old_sema = NULL;
old_lsn = srv_start_lsn; old_lsn = srv_start_lsn;
...@@ -2224,10 +2230,11 @@ srv_error_monitor_thread( ...@@ -2224,10 +2230,11 @@ srv_error_monitor_thread(
/* In case mutex_exit is not a memory barrier, it is /* In case mutex_exit is not a memory barrier, it is
theoretically possible some threads are left waiting though theoretically possible some threads are left waiting though
the semaphore is already released. Wake up those threads: */ the semaphore is already released. Wake up those threads: */
sync_arr_wake_threads_if_sema_free(); sync_arr_wake_threads_if_sema_free();
if (sync_array_print_long_waits()) { if (sync_array_print_long_waits(&waiter, &sema)
&& sema == old_sema && os_thread_eq(waiter, old_waiter)) {
fatal_cnt++; fatal_cnt++;
if (fatal_cnt > 10) { if (fatal_cnt > 10) {
...@@ -2242,6 +2249,8 @@ srv_error_monitor_thread( ...@@ -2242,6 +2249,8 @@ srv_error_monitor_thread(
} }
} else { } else {
fatal_cnt = 0; fatal_cnt = 0;
old_waiter = waiter;
old_sema = sema;
} }
/* Flush stderr so that a database user gets the output /* Flush stderr so that a database user gets the output
......
...@@ -916,10 +916,12 @@ sync_arr_wake_threads_if_sema_free(void) ...@@ -916,10 +916,12 @@ sync_arr_wake_threads_if_sema_free(void)
Prints warnings of long semaphore waits to stderr. */ Prints warnings of long semaphore waits to stderr. */
ibool ibool
sync_array_print_long_waits(void) sync_array_print_long_waits(
/*=============================*/ /*========================*/
/* out: TRUE if fatal semaphore wait threshold /* out: TRUE if fatal semaphore wait threshold
was exceeded */ was exceeded */
os_thread_id_t* waiter, /* out: longest waiting thread */
const void** sema) /* out: longest-waited-for semaphore */
{ {
sync_cell_t* cell; sync_cell_t* cell;
ibool old_val; ibool old_val;
...@@ -927,24 +929,40 @@ sync_array_print_long_waits(void) ...@@ -927,24 +929,40 @@ sync_array_print_long_waits(void)
ulint i; ulint i;
ulint fatal_timeout = srv_fatal_semaphore_wait_threshold; ulint fatal_timeout = srv_fatal_semaphore_wait_threshold;
ibool fatal = FALSE; ibool fatal = FALSE;
double longest_diff = 0;
for (i = 0; i < sync_primary_wait_array->n_cells; i++) { for (i = 0; i < sync_primary_wait_array->n_cells; i++) {
double diff;
void* wait_object;
cell = sync_array_get_nth_cell(sync_primary_wait_array, i); cell = sync_array_get_nth_cell(sync_primary_wait_array, i);
if (cell->wait_object != NULL && cell->waiting wait_object = cell->wait_object;
&& difftime(time(NULL), cell->reservation_time) > 240) {
if (wait_object == NULL || !cell->waiting) {
continue;
}
diff = difftime(time(NULL), cell->reservation_time);
if (diff > 240) {
fputs("InnoDB: Warning: a long semaphore wait:\n", fputs("InnoDB: Warning: a long semaphore wait:\n",
stderr); stderr);
sync_array_cell_print(stderr, cell); sync_array_cell_print(stderr, cell);
noticed = TRUE; noticed = TRUE;
} }
if (cell->wait_object != NULL && cell->waiting if (diff > fatal_timeout) {
&& difftime(time(NULL), cell->reservation_time)
> fatal_timeout) {
fatal = TRUE; fatal = TRUE;
} }
if (diff > longest_diff) {
longest_diff = diff;
*sema = wait_object;
*waiter = cell->thread;
}
} }
if (noticed) { if (noticed) {
......
2011-03-30 The InnoDB Team
* srv/srv0srv.c, sync/sync0arr.h, sync/sync0arr.c:
Fix Bug#11877216 InnoDB too eager to commit suicide on a busy server
2011-03-15 The InnoDB Team
* btr/btr0cur.c, page/page0zip.c:
Fix Bug#11849231 inflateInit() invoked without initializing all memory
2011-02-28 The InnoDB Team
* btr/btr0sea.c, buf/buf0buf.c, buf/buf0lru.c:
Fix Bug#58549 Race condition in buf_LRU_drop_page_hash_for_tablespace()
and compressed tables
2011-02-15 The InnoDB Team 2011-02-15 The InnoDB Team
* sync/sync0rw.c, innodb_bug59307.test: * sync/sync0rw.c, innodb_bug59307.test:
......
...@@ -4627,27 +4627,45 @@ btr_copy_blob_prefix( ...@@ -4627,27 +4627,45 @@ btr_copy_blob_prefix(
/*******************************************************************//** /*******************************************************************//**
Copies the prefix of a compressed BLOB. The clustered index record Copies the prefix of a compressed BLOB. The clustered index record
that points to this BLOB must be protected by a lock or a page latch. */ that points to this BLOB must be protected by a lock or a page latch.
@return number of bytes written to buf */
static static
void ulint
btr_copy_zblob_prefix( btr_copy_zblob_prefix(
/*==================*/ /*==================*/
z_stream* d_stream,/*!< in/out: the decompressing stream */ byte* buf, /*!< out: the externally stored part of
the field, or a prefix of it */
ulint len, /*!< in: length of buf, in bytes */
ulint zip_size,/*!< in: compressed BLOB page size */ ulint zip_size,/*!< in: compressed BLOB page size */
ulint space_id,/*!< in: space id of the BLOB pages */ ulint space_id,/*!< in: space id of the BLOB pages */
ulint page_no,/*!< in: page number of the first BLOB page */ ulint page_no,/*!< in: page number of the first BLOB page */
ulint offset) /*!< in: offset on the first BLOB page */ ulint offset) /*!< in: offset on the first BLOB page */
{ {
ulint page_type = FIL_PAGE_TYPE_ZBLOB; ulint page_type = FIL_PAGE_TYPE_ZBLOB;
mem_heap_t* heap;
int err;
z_stream d_stream;
d_stream.next_out = buf;
d_stream.avail_out = len;
d_stream.next_in = Z_NULL;
d_stream.avail_in = 0;
/* Zlib inflate needs 32 kilobytes for the default
window size, plus a few kilobytes for small objects. */
heap = mem_heap_create(40000);
page_zip_set_alloc(&d_stream, heap);
ut_ad(ut_is_2pow(zip_size)); ut_ad(ut_is_2pow(zip_size));
ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE); ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE);
ut_ad(zip_size <= UNIV_PAGE_SIZE); ut_ad(zip_size <= UNIV_PAGE_SIZE);
ut_ad(space_id); ut_ad(space_id);
err = inflateInit(&d_stream);
ut_a(err == Z_OK);
for (;;) { for (;;) {
buf_page_t* bpage; buf_page_t* bpage;
int err;
ulint next_page_no; ulint next_page_no;
/* There is no latch on bpage directly. Instead, /* There is no latch on bpage directly. Instead,
...@@ -4663,7 +4681,7 @@ btr_copy_zblob_prefix( ...@@ -4663,7 +4681,7 @@ btr_copy_zblob_prefix(
" compressed BLOB" " compressed BLOB"
" page %lu space %lu\n", " page %lu space %lu\n",
(ulong) page_no, (ulong) space_id); (ulong) page_no, (ulong) space_id);
return; goto func_exit;
} }
if (UNIV_UNLIKELY if (UNIV_UNLIKELY
...@@ -4689,13 +4707,13 @@ btr_copy_zblob_prefix( ...@@ -4689,13 +4707,13 @@ btr_copy_zblob_prefix(
offset += 4; offset += 4;
} }
d_stream->next_in = bpage->zip.data + offset; d_stream.next_in = bpage->zip.data + offset;
d_stream->avail_in = zip_size - offset; d_stream.avail_in = zip_size - offset;
err = inflate(d_stream, Z_NO_FLUSH); err = inflate(&d_stream, Z_NO_FLUSH);
switch (err) { switch (err) {
case Z_OK: case Z_OK:
if (!d_stream->avail_out) { if (!d_stream.avail_out) {
goto end_of_blob; goto end_of_blob;
} }
break; break;
...@@ -4712,13 +4730,13 @@ btr_copy_zblob_prefix( ...@@ -4712,13 +4730,13 @@ btr_copy_zblob_prefix(
" compressed BLOB" " compressed BLOB"
" page %lu space %lu returned %d (%s)\n", " page %lu space %lu returned %d (%s)\n",
(ulong) page_no, (ulong) space_id, (ulong) page_no, (ulong) space_id,
err, d_stream->msg); err, d_stream.msg);
case Z_BUF_ERROR: case Z_BUF_ERROR:
goto end_of_blob; goto end_of_blob;
} }
if (next_page_no == FIL_NULL) { if (next_page_no == FIL_NULL) {
if (!d_stream->avail_in) { if (!d_stream.avail_in) {
ut_print_timestamp(stderr); ut_print_timestamp(stderr);
fprintf(stderr, fprintf(stderr,
" InnoDB: unexpected end of" " InnoDB: unexpected end of"
...@@ -4727,7 +4745,7 @@ btr_copy_zblob_prefix( ...@@ -4727,7 +4745,7 @@ btr_copy_zblob_prefix(
(ulong) page_no, (ulong) page_no,
(ulong) space_id); (ulong) space_id);
} else { } else {
err = inflate(d_stream, Z_FINISH); err = inflate(&d_stream, Z_FINISH);
switch (err) { switch (err) {
case Z_STREAM_END: case Z_STREAM_END:
case Z_BUF_ERROR: case Z_BUF_ERROR:
...@@ -4739,7 +4757,7 @@ btr_copy_zblob_prefix( ...@@ -4739,7 +4757,7 @@ btr_copy_zblob_prefix(
end_of_blob: end_of_blob:
buf_page_release_zip(bpage); buf_page_release_zip(bpage);
return; goto func_exit;
} }
buf_page_release_zip(bpage); buf_page_release_zip(bpage);
...@@ -4751,6 +4769,12 @@ btr_copy_zblob_prefix( ...@@ -4751,6 +4769,12 @@ btr_copy_zblob_prefix(
offset = FIL_PAGE_NEXT; offset = FIL_PAGE_NEXT;
page_type = FIL_PAGE_TYPE_ZBLOB2; page_type = FIL_PAGE_TYPE_ZBLOB2;
} }
func_exit:
inflateEnd(&d_stream);
mem_heap_free(heap);
UNIV_MEM_ASSERT_RW(buf, d_stream.total_out);
return(d_stream.total_out);
} }
/*******************************************************************//** /*******************************************************************//**
...@@ -4776,28 +4800,8 @@ btr_copy_externally_stored_field_prefix_low( ...@@ -4776,28 +4800,8 @@ btr_copy_externally_stored_field_prefix_low(
} }
if (UNIV_UNLIKELY(zip_size)) { if (UNIV_UNLIKELY(zip_size)) {
int err; return(btr_copy_zblob_prefix(buf, len, zip_size,
z_stream d_stream; space_id, page_no, offset));
mem_heap_t* heap;
/* Zlib inflate needs 32 kilobytes for the default
window size, plus a few kilobytes for small objects. */
heap = mem_heap_create(40000);
page_zip_set_alloc(&d_stream, heap);
err = inflateInit(&d_stream);
ut_a(err == Z_OK);
d_stream.next_out = buf;
d_stream.avail_out = len;
d_stream.avail_in = 0;
btr_copy_zblob_prefix(&d_stream, zip_size,
space_id, page_no, offset);
inflateEnd(&d_stream);
mem_heap_free(heap);
UNIV_MEM_ASSERT_RW(buf, d_stream.total_out);
return(d_stream.total_out);
} else { } else {
return(btr_copy_blob_prefix(buf, len, space_id, return(btr_copy_blob_prefix(buf, len, space_id,
page_no, offset)); page_no, offset));
......
...@@ -1201,8 +1201,8 @@ btr_search_drop_page_hash_when_freed( ...@@ -1201,8 +1201,8 @@ btr_search_drop_page_hash_when_freed(
having to fear a deadlock. */ having to fear a deadlock. */
block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL, block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL,
BUF_GET_IF_IN_POOL, __FILE__, __LINE__, BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__,
&mtr); &mtr);
/* Because the buffer pool mutex was released by /* Because the buffer pool mutex was released by
buf_page_peek_if_search_hashed(), it is possible that the buf_page_peek_if_search_hashed(), it is possible that the
block was removed from the buffer pool by another thread block was removed from the buffer pool by another thread
......
...@@ -1893,16 +1893,19 @@ buf_block_align( ...@@ -1893,16 +1893,19 @@ buf_block_align(
/* TODO: protect buf_pool->chunks with a mutex (it will /* TODO: protect buf_pool->chunks with a mutex (it will
currently remain constant after buf_pool_init()) */ currently remain constant after buf_pool_init()) */
for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) { for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
lint offs = ptr - chunk->blocks->frame; ulint offs;
if (UNIV_UNLIKELY(offs < 0)) { if (UNIV_UNLIKELY(ptr < chunk->blocks->frame)) {
continue; continue;
} }
/* else */
offs = ptr - chunk->blocks->frame;
offs >>= UNIV_PAGE_SIZE_SHIFT; offs >>= UNIV_PAGE_SIZE_SHIFT;
if (UNIV_LIKELY((ulint) offs < chunk->size)) { if (UNIV_LIKELY(offs < chunk->size)) {
buf_block_t* block = &chunk->blocks[offs]; buf_block_t* block = &chunk->blocks[offs];
/* The function buf_chunk_init() invokes /* The function buf_chunk_init() invokes
...@@ -2028,7 +2031,7 @@ buf_page_get_gen( ...@@ -2028,7 +2031,7 @@ buf_page_get_gen(
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
buf_block_t* guess, /*!< in: guessed block or NULL */ buf_block_t* guess, /*!< in: guessed block or NULL */
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL, ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
BUF_GET_NO_LATCH */ BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH */
const char* file, /*!< in: file name */ const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */ ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mini-transaction */ mtr_t* mtr) /*!< in: mini-transaction */
...@@ -2044,9 +2047,19 @@ buf_page_get_gen( ...@@ -2044,9 +2047,19 @@ buf_page_get_gen(
ut_ad((rw_latch == RW_S_LATCH) ut_ad((rw_latch == RW_S_LATCH)
|| (rw_latch == RW_X_LATCH) || (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_NO_LATCH)); || (rw_latch == RW_NO_LATCH));
ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH)); #ifdef UNIV_DEBUG
ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL) switch (mode) {
|| (mode == BUF_GET_NO_LATCH)); case BUF_GET_NO_LATCH:
ut_ad(rw_latch == RW_NO_LATCH);
break;
case BUF_GET:
case BUF_GET_IF_IN_POOL:
case BUF_PEEK_IF_IN_POOL:
break;
default:
ut_error;
}
#endif /* UNIV_DEBUG */
ut_ad(zip_size == fil_space_get_zip_size(space)); ut_ad(zip_size == fil_space_get_zip_size(space));
ut_ad(ut_is_2pow(zip_size)); ut_ad(ut_is_2pow(zip_size));
#ifndef UNIV_LOG_DEBUG #ifndef UNIV_LOG_DEBUG
...@@ -2088,7 +2101,8 @@ buf_page_get_gen( ...@@ -2088,7 +2101,8 @@ buf_page_get_gen(
buf_pool_mutex_exit(); buf_pool_mutex_exit();
if (mode == BUF_GET_IF_IN_POOL) { if (mode == BUF_GET_IF_IN_POOL
|| mode == BUF_PEEK_IF_IN_POOL) {
return(NULL); return(NULL);
} }
...@@ -2127,7 +2141,8 @@ buf_page_get_gen( ...@@ -2127,7 +2141,8 @@ buf_page_get_gen(
must_read = buf_block_get_io_fix(block) == BUF_IO_READ; must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
if (must_read && mode == BUF_GET_IF_IN_POOL) { if (must_read && (mode == BUF_GET_IF_IN_POOL
|| mode == BUF_PEEK_IF_IN_POOL)) {
/* The page is only being read to buffer */ /* The page is only being read to buffer */
buf_pool_mutex_exit(); buf_pool_mutex_exit();
...@@ -2245,6 +2260,7 @@ buf_page_get_gen( ...@@ -2245,6 +2260,7 @@ buf_page_get_gen(
mutex_exit(&buf_pool_zip_mutex); mutex_exit(&buf_pool_zip_mutex);
buf_pool->n_pend_unzip++; buf_pool->n_pend_unzip++;
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof *bpage); buf_buddy_free(bpage, sizeof *bpage);
buf_pool_mutex_exit(); buf_pool_mutex_exit();
...@@ -2321,7 +2337,9 @@ buf_page_get_gen( ...@@ -2321,7 +2337,9 @@ buf_page_get_gen(
buf_pool_mutex_exit(); buf_pool_mutex_exit();
buf_page_set_accessed_make_young(&block->page, access_time); if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {
buf_page_set_accessed_make_young(&block->page, access_time);
}
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
ut_a(!block->page.file_page_was_freed); ut_a(!block->page.file_page_was_freed);
...@@ -2374,7 +2392,7 @@ buf_page_get_gen( ...@@ -2374,7 +2392,7 @@ buf_page_get_gen(
mtr_memo_push(mtr, block, fix_type); mtr_memo_push(mtr, block, fix_type);
if (!access_time) { if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL) && !access_time) {
/* In the case of a first access, try to apply linear /* In the case of a first access, try to apply linear
read-ahead */ read-ahead */
...@@ -2923,6 +2941,7 @@ buf_page_init_for_read( ...@@ -2923,6 +2941,7 @@ buf_page_init_for_read(
&& UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) { && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
/* The block was added by some other thread. */ /* The block was added by some other thread. */
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof *bpage); buf_buddy_free(bpage, sizeof *bpage);
buf_buddy_free(data, zip_size); buf_buddy_free(data, zip_size);
......
...@@ -246,71 +246,75 @@ buf_LRU_drop_page_hash_for_tablespace( ...@@ -246,71 +246,75 @@ buf_LRU_drop_page_hash_for_tablespace(
page_arr = ut_malloc(sizeof(ulint) page_arr = ut_malloc(sizeof(ulint)
* BUF_LRU_DROP_SEARCH_HASH_SIZE); * BUF_LRU_DROP_SEARCH_HASH_SIZE);
buf_pool_mutex_enter(); buf_pool_mutex_enter();
num_entries = 0;
scan_again: scan_again:
num_entries = 0;
bpage = UT_LIST_GET_LAST(buf_pool->LRU); bpage = UT_LIST_GET_LAST(buf_pool->LRU);
while (bpage != NULL) { while (bpage != NULL) {
mutex_t* block_mutex = buf_page_get_mutex(bpage);
buf_page_t* prev_bpage; buf_page_t* prev_bpage;
ibool is_fixed;
mutex_enter(block_mutex);
prev_bpage = UT_LIST_GET_PREV(LRU, bpage); prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
ut_a(buf_page_in_file(bpage)); ut_a(buf_page_in_file(bpage));
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
|| bpage->space != id || bpage->space != id
|| bpage->buf_fix_count > 0
|| bpage->io_fix != BUF_IO_NONE) { || bpage->io_fix != BUF_IO_NONE) {
/* We leave the fixed pages as is in this scan. /* Compressed pages are never hashed.
To be dealt with later in the final scan. */ Skip blocks of other tablespaces.
mutex_exit(block_mutex); Skip I/O-fixed blocks (to be dealt with later). */
goto next_page; next_page:
bpage = prev_bpage;
continue;
} }
if (((buf_block_t*) bpage)->is_hashed) { mutex_enter(&((buf_block_t*) bpage)->mutex);
is_fixed = bpage->buf_fix_count > 0
|| !((buf_block_t*) bpage)->is_hashed;
mutex_exit(&((buf_block_t*) bpage)->mutex);
/* Store the offset(i.e.: page_no) in the array if (is_fixed) {
so that we can drop hash index in a batch goto next_page;
later. */ }
page_arr[num_entries] = bpage->offset;
mutex_exit(block_mutex);
ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
++num_entries;
if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) { /* Store the page number so that we can drop the hash
goto next_page; index in a batch later. */
} page_arr[num_entries] = bpage->offset;
/* Array full. We release the buf_pool_mutex to ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
obey the latching order. */ ++num_entries;
buf_pool_mutex_exit();
if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, goto next_page;
num_entries);
num_entries = 0;
buf_pool_mutex_enter();
} else {
mutex_exit(block_mutex);
} }
next_page: /* Array full. We release the buf_pool_mutex to
/* Note that we may have released the buf_pool mutex obey the latching order. */
above after reading the prev_bpage during processing buf_pool_mutex_exit();
of a page_hash_batch (i.e.: when the array was full). buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
This means that prev_bpage can change in LRU list. num_entries);
This is OK because this function is a 'best effort' buf_pool_mutex_enter();
to drop as many search hash entries as possible and num_entries = 0;
it does not guarantee that ALL such entries will be
dropped. */ /* Note that we released the buf_pool mutex above
bpage = prev_bpage; after reading the prev_bpage during processing of a
page_hash_batch (i.e.: when the array was full).
Because prev_bpage could belong to a compressed-only
block, it may have been relocated, and thus the
pointer cannot be trusted. Because bpage is of type
buf_block_t, it is safe to dereference.
bpage can change in the LRU list. This is OK because
this function is a 'best effort' to drop as many
search hash entries as possible and it does not
guarantee that ALL such entries will be dropped. */
/* If, however, bpage has been removed from LRU list /* If, however, bpage has been removed from LRU list
to the free list then we should restart the scan. to the free list then we should restart the scan.
bpage->state is protected by buf_pool mutex. */ bpage->state is protected by buf_pool mutex. */
if (bpage && !buf_page_in_file(bpage)) { if (bpage
ut_a(num_entries == 0); && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
goto scan_again; goto scan_again;
} }
} }
...@@ -1799,6 +1803,7 @@ buf_LRU_block_remove_hashed_page( ...@@ -1799,6 +1803,7 @@ buf_LRU_block_remove_hashed_page(
buf_pool_mutex_exit_forbid(); buf_pool_mutex_exit_forbid();
buf_buddy_free(bpage->zip.data, buf_buddy_free(bpage->zip.data,
page_zip_get_size(&bpage->zip)); page_zip_get_size(&bpage->zip));
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof(*bpage)); buf_buddy_free(bpage, sizeof(*bpage));
buf_pool_mutex_exit_allow(); buf_pool_mutex_exit_allow();
UNIV_MEM_UNDESC(bpage); UNIV_MEM_UNDESC(bpage);
......
...@@ -41,6 +41,8 @@ Created 11/5/1995 Heikki Tuuri ...@@ -41,6 +41,8 @@ Created 11/5/1995 Heikki Tuuri
/* @{ */ /* @{ */
#define BUF_GET 10 /*!< get always */ #define BUF_GET 10 /*!< get always */
#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */ #define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
#define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make
the block young in the LRU list */
#define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but #define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but
set no latch; we have set no latch; we have
separated this case, because separated this case, because
...@@ -284,7 +286,7 @@ buf_page_get_gen( ...@@ -284,7 +286,7 @@ buf_page_get_gen(
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
buf_block_t* guess, /*!< in: guessed block or NULL */ buf_block_t* guess, /*!< in: guessed block or NULL */
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL, ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
BUF_GET_NO_LATCH */ BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH */
const char* file, /*!< in: file name */ const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */ ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mini-transaction */ mtr_t* mtr); /*!< in: mini-transaction */
......
...@@ -115,8 +115,11 @@ Prints warnings of long semaphore waits to stderr. ...@@ -115,8 +115,11 @@ Prints warnings of long semaphore waits to stderr.
@return TRUE if fatal semaphore wait threshold was exceeded */ @return TRUE if fatal semaphore wait threshold was exceeded */
UNIV_INTERN UNIV_INTERN
ibool ibool
sync_array_print_long_waits(void); sync_array_print_long_waits(
/*=============================*/ /*========================*/
os_thread_id_t* waiter, /*!< out: longest waiting thread */
const void** sema) /*!< out: longest-waited-for semaphore */
__attribute__((nonnull));
/********************************************************************//** /********************************************************************//**
Validates the integrity of the wait array. Checks Validates the integrity of the wait array. Checks
that the number of reserved cells equals the count variable. */ that the number of reserved cells equals the count variable. */
......
...@@ -653,13 +653,13 @@ page_zip_dir_encode( ...@@ -653,13 +653,13 @@ page_zip_dir_encode(
Allocate memory for zlib. */ Allocate memory for zlib. */
static static
void* void*
page_zip_malloc( page_zip_zalloc(
/*============*/ /*============*/
void* opaque, /*!< in/out: memory heap */ void* opaque, /*!< in/out: memory heap */
uInt items, /*!< in: number of items to allocate */ uInt items, /*!< in: number of items to allocate */
uInt size) /*!< in: size of an item in bytes */ uInt size) /*!< in: size of an item in bytes */
{ {
return(mem_heap_alloc(opaque, items * size)); return(mem_heap_zalloc(opaque, items * size));
} }
/**********************************************************************//** /**********************************************************************//**
...@@ -684,7 +684,7 @@ page_zip_set_alloc( ...@@ -684,7 +684,7 @@ page_zip_set_alloc(
{ {
z_stream* strm = stream; z_stream* strm = stream;
strm->zalloc = page_zip_malloc; strm->zalloc = page_zip_zalloc;
strm->zfree = page_zip_free; strm->zfree = page_zip_free;
strm->opaque = heap; strm->opaque = heap;
} }
...@@ -2912,19 +2912,18 @@ page_zip_decompress( ...@@ -2912,19 +2912,18 @@ page_zip_decompress(
page_zip_set_alloc(&d_stream, heap); page_zip_set_alloc(&d_stream, heap);
if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
!= Z_OK)) {
ut_error;
}
d_stream.next_in = page_zip->data + PAGE_DATA; d_stream.next_in = page_zip->data + PAGE_DATA;
/* Subtract the space reserved for /* Subtract the space reserved for
the page header and the end marker of the modification log. */ the page header and the end marker of the modification log. */
d_stream.avail_in = page_zip_get_size(page_zip) - (PAGE_DATA + 1); d_stream.avail_in = page_zip_get_size(page_zip) - (PAGE_DATA + 1);
d_stream.next_out = page + PAGE_ZIP_START; d_stream.next_out = page + PAGE_ZIP_START;
d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START; d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
!= Z_OK)) {
ut_error;
}
/* Decode the zlib header and the index information. */ /* Decode the zlib header and the index information. */
if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) { if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
......
...@@ -2236,6 +2236,12 @@ srv_error_monitor_thread( ...@@ -2236,6 +2236,12 @@ srv_error_monitor_thread(
ulint fatal_cnt = 0; ulint fatal_cnt = 0;
ib_uint64_t old_lsn; ib_uint64_t old_lsn;
ib_uint64_t new_lsn; ib_uint64_t new_lsn;
/* longest waiting thread for a semaphore */
os_thread_id_t waiter = os_thread_get_curr_id();
os_thread_id_t old_waiter = waiter;
/* the semaphore that is being waited for */
const void* sema = NULL;
const void* old_sema = NULL;
old_lsn = srv_start_lsn; old_lsn = srv_start_lsn;
...@@ -2284,7 +2290,8 @@ srv_error_monitor_thread( ...@@ -2284,7 +2290,8 @@ srv_error_monitor_thread(
sync_arr_wake_threads_if_sema_free(); sync_arr_wake_threads_if_sema_free();
if (sync_array_print_long_waits()) { if (sync_array_print_long_waits(&waiter, &sema)
&& sema == old_sema && os_thread_eq(waiter, old_waiter)) {
fatal_cnt++; fatal_cnt++;
if (fatal_cnt > 10) { if (fatal_cnt > 10) {
...@@ -2299,6 +2306,8 @@ srv_error_monitor_thread( ...@@ -2299,6 +2306,8 @@ srv_error_monitor_thread(
} }
} else { } else {
fatal_cnt = 0; fatal_cnt = 0;
old_waiter = waiter;
old_sema = sema;
} }
/* Flush stderr so that a database user gets the output /* Flush stderr so that a database user gets the output
......
...@@ -914,8 +914,10 @@ Prints warnings of long semaphore waits to stderr. ...@@ -914,8 +914,10 @@ Prints warnings of long semaphore waits to stderr.
@return TRUE if fatal semaphore wait threshold was exceeded */ @return TRUE if fatal semaphore wait threshold was exceeded */
UNIV_INTERN UNIV_INTERN
ibool ibool
sync_array_print_long_waits(void) sync_array_print_long_waits(
/*=============================*/ /*========================*/
os_thread_id_t* waiter, /*!< out: longest waiting thread */
const void** sema) /*!< out: longest-waited-for semaphore */
{ {
sync_cell_t* cell; sync_cell_t* cell;
ibool old_val; ibool old_val;
...@@ -923,24 +925,40 @@ sync_array_print_long_waits(void) ...@@ -923,24 +925,40 @@ sync_array_print_long_waits(void)
ulint i; ulint i;
ulint fatal_timeout = srv_fatal_semaphore_wait_threshold; ulint fatal_timeout = srv_fatal_semaphore_wait_threshold;
ibool fatal = FALSE; ibool fatal = FALSE;
double longest_diff = 0;
for (i = 0; i < sync_primary_wait_array->n_cells; i++) { for (i = 0; i < sync_primary_wait_array->n_cells; i++) {
double diff;
void* wait_object;
cell = sync_array_get_nth_cell(sync_primary_wait_array, i); cell = sync_array_get_nth_cell(sync_primary_wait_array, i);
if (cell->wait_object != NULL && cell->waiting wait_object = cell->wait_object;
&& difftime(time(NULL), cell->reservation_time) > 240) {
if (wait_object == NULL || !cell->waiting) {
continue;
}
diff = difftime(time(NULL), cell->reservation_time);
if (diff > 240) {
fputs("InnoDB: Warning: a long semaphore wait:\n", fputs("InnoDB: Warning: a long semaphore wait:\n",
stderr); stderr);
sync_array_cell_print(stderr, cell); sync_array_cell_print(stderr, cell);
noticed = TRUE; noticed = TRUE;
} }
if (cell->wait_object != NULL && cell->waiting if (diff > fatal_timeout) {
&& difftime(time(NULL), cell->reservation_time)
> fatal_timeout) {
fatal = TRUE; fatal = TRUE;
} }
if (diff > longest_diff) {
longest_diff = diff;
*sema = wait_object;
*waiter = cell->thread;
}
} }
if (noticed) { if (noticed) {
......
...@@ -508,7 +508,7 @@ fill_trx_row( ...@@ -508,7 +508,7 @@ fill_trx_row(
query[stmt_len] = '\0'; query[stmt_len] = '\0';
row->trx_query = ha_storage_put_memlim( row->trx_query = ha_storage_put_memlim(
cache->storage, stmt, stmt_len + 1, cache->storage, query, stmt_len + 1,
MAX_ALLOWED_FOR_STORAGE(cache)); MAX_ALLOWED_FOR_STORAGE(cache));
row->trx_query_cs = innobase_get_charset(trx->mysql_thd); row->trx_query_cs = innobase_get_charset(trx->mysql_thd);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment