Commit 0f8ae318 authored by Marko Mäkelä's avatar Marko Mäkelä

Bug #58549 Race condition in buf_LRU_drop_page_hash_for_tablespace()

and compressed tables

buf_LRU_drop_page_hash_for_tablespace(): after releasing and
reacquiring the buffer pool mutex, do not dereference any block
descriptor pointer that is not known to be a pointer to an
uncompressed page frame (type buf_block_t; state ==
BUF_BLOCK_FILE_PAGE). Also, defer the acquisition of the block_mutex
until it is needed.

buf_page_get_gen(): Add mode == BUF_GET_IF_IN_POOL_PEEK for
buffer-fixing a block without making it young in the LRU list.

buf_page_get_gen(), buf_page_init(), buf_LRU_block_remove_hashed_page():
Set bpage->state = BUF_BLOCK_ZIP_FREE before buf_buddy_free(bpage),
so that similar race conditions might be detected a little easier.

btr_search_drop_page_hash_when_freed(): Use BUF_GET_IF_IN_POOL_PEEK
when dropping the hash indexes.

rb://528 approved by Jimmy Yang
parent 5a805fe7
2011-02-28 The InnoDB Team
* btr/btr0sea.c, buf/buf0buf.c, buf/buf0lru.c:
Fix Bug#58549 Race condition in buf_LRU_drop_page_hash_for_tablespace()
and compressed tables
2011-02-15 The InnoDB Team 2011-02-15 The InnoDB Team
* sync/sync0rw.c, innodb_bug59307.test: * sync/sync0rw.c, innodb_bug59307.test:
......
...@@ -1201,7 +1201,7 @@ btr_search_drop_page_hash_when_freed( ...@@ -1201,7 +1201,7 @@ btr_search_drop_page_hash_when_freed(
having to fear a deadlock. */ having to fear a deadlock. */
block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL, block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL,
BUF_GET_IF_IN_POOL, __FILE__, __LINE__, BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__,
&mtr); &mtr);
/* Because the buffer pool mutex was released by /* Because the buffer pool mutex was released by
buf_page_peek_if_search_hashed(), it is possible that the buf_page_peek_if_search_hashed(), it is possible that the
......
...@@ -2031,7 +2031,7 @@ buf_page_get_gen( ...@@ -2031,7 +2031,7 @@ buf_page_get_gen(
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
buf_block_t* guess, /*!< in: guessed block or NULL */ buf_block_t* guess, /*!< in: guessed block or NULL */
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL, ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
BUF_GET_NO_LATCH */ BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH */
const char* file, /*!< in: file name */ const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */ ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mini-transaction */ mtr_t* mtr) /*!< in: mini-transaction */
...@@ -2047,9 +2047,19 @@ buf_page_get_gen( ...@@ -2047,9 +2047,19 @@ buf_page_get_gen(
ut_ad((rw_latch == RW_S_LATCH) ut_ad((rw_latch == RW_S_LATCH)
|| (rw_latch == RW_X_LATCH) || (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_NO_LATCH)); || (rw_latch == RW_NO_LATCH));
ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH)); #ifdef UNIV_DEBUG
ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL) switch (mode) {
|| (mode == BUF_GET_NO_LATCH)); case BUF_GET_NO_LATCH:
ut_ad(rw_latch == RW_NO_LATCH);
break;
case BUF_GET:
case BUF_GET_IF_IN_POOL:
case BUF_PEEK_IF_IN_POOL:
break;
default:
ut_error;
}
#endif /* UNIV_DEBUG */
ut_ad(zip_size == fil_space_get_zip_size(space)); ut_ad(zip_size == fil_space_get_zip_size(space));
ut_ad(ut_is_2pow(zip_size)); ut_ad(ut_is_2pow(zip_size));
#ifndef UNIV_LOG_DEBUG #ifndef UNIV_LOG_DEBUG
...@@ -2091,7 +2101,8 @@ loop2: ...@@ -2091,7 +2101,8 @@ loop2:
buf_pool_mutex_exit(); buf_pool_mutex_exit();
if (mode == BUF_GET_IF_IN_POOL) { if (mode == BUF_GET_IF_IN_POOL
|| mode == BUF_PEEK_IF_IN_POOL) {
return(NULL); return(NULL);
} }
...@@ -2130,7 +2141,8 @@ loop2: ...@@ -2130,7 +2141,8 @@ loop2:
must_read = buf_block_get_io_fix(block) == BUF_IO_READ; must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
if (must_read && mode == BUF_GET_IF_IN_POOL) { if (must_read && (mode == BUF_GET_IF_IN_POOL
|| mode == BUF_PEEK_IF_IN_POOL)) {
/* The page is only being read to buffer */ /* The page is only being read to buffer */
buf_pool_mutex_exit(); buf_pool_mutex_exit();
...@@ -2248,6 +2260,7 @@ wait_until_unfixed: ...@@ -2248,6 +2260,7 @@ wait_until_unfixed:
mutex_exit(&buf_pool_zip_mutex); mutex_exit(&buf_pool_zip_mutex);
buf_pool->n_pend_unzip++; buf_pool->n_pend_unzip++;
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof *bpage); buf_buddy_free(bpage, sizeof *bpage);
buf_pool_mutex_exit(); buf_pool_mutex_exit();
...@@ -2324,7 +2337,9 @@ wait_until_unfixed: ...@@ -2324,7 +2337,9 @@ wait_until_unfixed:
buf_pool_mutex_exit(); buf_pool_mutex_exit();
if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {
buf_page_set_accessed_make_young(&block->page, access_time); buf_page_set_accessed_make_young(&block->page, access_time);
}
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
ut_a(!block->page.file_page_was_freed); ut_a(!block->page.file_page_was_freed);
...@@ -2377,7 +2392,7 @@ wait_until_unfixed: ...@@ -2377,7 +2392,7 @@ wait_until_unfixed:
mtr_memo_push(mtr, block, fix_type); mtr_memo_push(mtr, block, fix_type);
if (!access_time) { if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL) && !access_time) {
/* In the case of a first access, try to apply linear /* In the case of a first access, try to apply linear
read-ahead */ read-ahead */
...@@ -2926,6 +2941,7 @@ err_exit: ...@@ -2926,6 +2941,7 @@ err_exit:
&& UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) { && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
/* The block was added by some other thread. */ /* The block was added by some other thread. */
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof *bpage); buf_buddy_free(bpage, sizeof *bpage);
buf_buddy_free(data, zip_size); buf_buddy_free(data, zip_size);
......
...@@ -246,71 +246,75 @@ buf_LRU_drop_page_hash_for_tablespace( ...@@ -246,71 +246,75 @@ buf_LRU_drop_page_hash_for_tablespace(
page_arr = ut_malloc(sizeof(ulint) page_arr = ut_malloc(sizeof(ulint)
* BUF_LRU_DROP_SEARCH_HASH_SIZE); * BUF_LRU_DROP_SEARCH_HASH_SIZE);
buf_pool_mutex_enter(); buf_pool_mutex_enter();
num_entries = 0;
scan_again: scan_again:
num_entries = 0;
bpage = UT_LIST_GET_LAST(buf_pool->LRU); bpage = UT_LIST_GET_LAST(buf_pool->LRU);
while (bpage != NULL) { while (bpage != NULL) {
mutex_t* block_mutex = buf_page_get_mutex(bpage);
buf_page_t* prev_bpage; buf_page_t* prev_bpage;
ibool is_fixed;
mutex_enter(block_mutex);
prev_bpage = UT_LIST_GET_PREV(LRU, bpage); prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
ut_a(buf_page_in_file(bpage)); ut_a(buf_page_in_file(bpage));
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
|| bpage->space != id || bpage->space != id
|| bpage->buf_fix_count > 0
|| bpage->io_fix != BUF_IO_NONE) { || bpage->io_fix != BUF_IO_NONE) {
/* We leave the fixed pages as is in this scan. /* Compressed pages are never hashed.
To be dealt with later in the final scan. */ Skip blocks of other tablespaces.
mutex_exit(block_mutex); Skip I/O-fixed blocks (to be dealt with later). */
goto next_page; next_page:
bpage = prev_bpage;
continue;
} }
if (((buf_block_t*) bpage)->is_hashed) { mutex_enter(&((buf_block_t*) bpage)->mutex);
is_fixed = bpage->buf_fix_count > 0
|| !((buf_block_t*) bpage)->is_hashed;
mutex_exit(&((buf_block_t*) bpage)->mutex);
/* Store the offset(i.e.: page_no) in the array if (is_fixed) {
so that we can drop hash index in a batch goto next_page;
later. */ }
/* Store the page number so that we can drop the hash
index in a batch later. */
page_arr[num_entries] = bpage->offset; page_arr[num_entries] = bpage->offset;
mutex_exit(block_mutex);
ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE); ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
++num_entries; ++num_entries;
if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) { if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
goto next_page; goto next_page;
} }
/* Array full. We release the buf_pool_mutex to /* Array full. We release the buf_pool_mutex to
obey the latching order. */ obey the latching order. */
buf_pool_mutex_exit(); buf_pool_mutex_exit();
buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
num_entries); num_entries);
num_entries = 0;
buf_pool_mutex_enter(); buf_pool_mutex_enter();
} else { num_entries = 0;
mutex_exit(block_mutex);
}
next_page: /* Note that we released the buf_pool mutex above
/* Note that we may have released the buf_pool mutex after reading the prev_bpage during processing of a
above after reading the prev_bpage during processing page_hash_batch (i.e.: when the array was full).
of a page_hash_batch (i.e.: when the array was full). Because prev_bpage could belong to a compressed-only
This means that prev_bpage can change in LRU list. block, it may have been relocated, and thus the
This is OK because this function is a 'best effort' pointer cannot be trusted. Because bpage is of type
to drop as many search hash entries as possible and buf_block_t, it is safe to dereference.
it does not guarantee that ALL such entries will be
dropped. */ bpage can change in the LRU list. This is OK because
bpage = prev_bpage; this function is a 'best effort' to drop as many
search hash entries as possible and it does not
guarantee that ALL such entries will be dropped. */
/* If, however, bpage has been removed from LRU list /* If, however, bpage has been removed from LRU list
to the free list then we should restart the scan. to the free list then we should restart the scan.
bpage->state is protected by buf_pool mutex. */ bpage->state is protected by buf_pool mutex. */
if (bpage && !buf_page_in_file(bpage)) { if (bpage
ut_a(num_entries == 0); && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
goto scan_again; goto scan_again;
} }
} }
...@@ -1799,6 +1803,7 @@ buf_LRU_block_remove_hashed_page( ...@@ -1799,6 +1803,7 @@ buf_LRU_block_remove_hashed_page(
buf_pool_mutex_exit_forbid(); buf_pool_mutex_exit_forbid();
buf_buddy_free(bpage->zip.data, buf_buddy_free(bpage->zip.data,
page_zip_get_size(&bpage->zip)); page_zip_get_size(&bpage->zip));
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof(*bpage)); buf_buddy_free(bpage, sizeof(*bpage));
buf_pool_mutex_exit_allow(); buf_pool_mutex_exit_allow();
UNIV_MEM_UNDESC(bpage); UNIV_MEM_UNDESC(bpage);
......
...@@ -41,6 +41,8 @@ Created 11/5/1995 Heikki Tuuri ...@@ -41,6 +41,8 @@ Created 11/5/1995 Heikki Tuuri
/* @{ */ /* @{ */
#define BUF_GET 10 /*!< get always */ #define BUF_GET 10 /*!< get always */
#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */ #define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
#define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make
the block young in the LRU list */
#define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but #define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but
set no latch; we have set no latch; we have
separated this case, because separated this case, because
...@@ -284,7 +286,7 @@ buf_page_get_gen( ...@@ -284,7 +286,7 @@ buf_page_get_gen(
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
buf_block_t* guess, /*!< in: guessed block or NULL */ buf_block_t* guess, /*!< in: guessed block or NULL */
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL, ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
BUF_GET_NO_LATCH */ BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH */
const char* file, /*!< in: file name */ const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */ ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mini-transaction */ mtr_t* mtr); /*!< in: mini-transaction */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment