Commit e60e6505 authored by Inaam Rana's avatar Inaam Rana

Bug 12635227 - 61188: DROP TABLE EXTREMELY SLOW

approved by: Marko
rb://681

Coalescing of free buf_page_t descriptors can prove to be one severe
bottleneck in performance of compression. One such workload where it
hurts badly is DROP TABLE. This patch removes buf_page_t allocations
from buf_buddy and uses ut_malloc instead.
In order to further reduce overhead of colaescing we no longer attempt
to coalesce a block if the corresponding free_list is less than 16 in
size.
parent 7de029da
2011-06-16 The InnoDB Team
* btr/btr0cur.c, buf/buf0buddy.c, buf/buf0buf.c, buf/buf0lru.c,
include/buf0buddy.h, include/buf0buddy.ic, include/buf0buf.h,
include/buf0buf.ic, include/buf0lru.h, include/buf0types.h:
Fix Bug#61188 DROP TABLE extremely slow
2011-06-16 The InnoDB Team
* buf/buf0buddy.c, buf/buf0buf.c, buf/buf0flu.c, buf/buf0lru.c,
......
......@@ -3864,7 +3864,7 @@ btr_blob_free(
&& buf_block_get_space(block) == space
&& buf_block_get_page_no(block) == page_no) {
if (buf_LRU_free_block(&block->page, all) != BUF_LRU_FREED
if (!buf_LRU_free_block(&block->page, all)
&& all && block->page.zip.data) {
/* Attempt to deallocate the uncompressed page
if the whole block cannot be deallocted. */
......
......@@ -45,6 +45,14 @@ static ulint buf_buddy_n_frames;
Protected by buf_pool_mutex. */
UNIV_INTERN buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1];
/** Validate a given zip_free list. */
#define BUF_BUDDY_LIST_VALIDATE(i) \
UT_LIST_VALIDATE(list, buf_page_t, \
buf_pool->zip_free[i], \
ut_ad(buf_page_get_state( \
ut_list_node_313) \
== BUF_BLOCK_ZIP_FREE))
/**********************************************************************//**
Get the offset of the buddy of a compressed page frame.
@return the buddy relative of page */
......@@ -76,21 +84,10 @@ buf_buddy_add_to_free(
buf_page_t* bpage, /*!< in,own: block to be freed */
ulint i) /*!< in: index of buf_pool->zip_free[] */
{
#ifdef UNIV_DEBUG_VALGRIND
buf_page_t* b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
ut_ad(buf_pool_mutex_own());
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
ut_ad(buf_pool->zip_free[i].start != bpage);
UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
#ifdef UNIV_DEBUG_VALGRIND
if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
}
/**********************************************************************//**
......@@ -102,25 +99,17 @@ buf_buddy_remove_from_free(
buf_page_t* bpage, /*!< in: block to be removed */
ulint i) /*!< in: index of buf_pool->zip_free[] */
{
#ifdef UNIV_DEBUG_VALGRIND
#ifdef UNIV_DEBUG
buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
#endif /* UNIV_DEBUG_VALGRIND */
#endif /* UNIV_DEBUG */
ut_ad(buf_pool_mutex_own());
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
#ifdef UNIV_DEBUG_VALGRIND
if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
if (next) UNIV_MEM_FREE(next, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
}
/**********************************************************************//**
......@@ -136,17 +125,13 @@ buf_buddy_alloc_zip(
ut_ad(buf_pool_mutex_own());
ut_a(i < BUF_BUDDY_SIZES);
ut_a(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
ut_d(BUF_BUDDY_LIST_VALIDATE(i));
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(ut_list_node_313)
== BUF_BLOCK_ZIP_FREE)));
#endif /* !UNIV_DEBUG_VALGRIND */
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (bpage) {
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
buf_buddy_remove_from_free(bpage, i);
......@@ -165,13 +150,10 @@ buf_buddy_alloc_zip(
}
}
#ifdef UNIV_DEBUG
if (bpage) {
memset(bpage, ~i, BUF_BUDDY_LOW << i);
ut_d(memset(bpage, ~i, BUF_BUDDY_LOW << i));
UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
}
#endif /* UNIV_DEBUG */
UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
return(bpage);
}
......@@ -255,6 +237,7 @@ buf_buddy_alloc_from(
{
ulint offs = BUF_BUDDY_LOW << j;
ut_ad(j <= BUF_BUDDY_SIZES);
ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
ut_ad(j >= i);
ut_ad(!ut_align_offset(buf, offs));
......@@ -268,13 +251,7 @@ buf_buddy_alloc_from(
bpage = (buf_page_t*) ((byte*) buf + offs);
ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
bpage->state = BUF_BLOCK_ZIP_FREE;
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(
ut_list_node_313)
== BUF_BLOCK_ZIP_FREE)));
#endif /* !UNIV_DEBUG_VALGRIND */
ut_d(BUF_BUDDY_LIST_VALIDATE(i));
buf_buddy_add_to_free(bpage, j);
}
......@@ -284,8 +261,8 @@ buf_buddy_alloc_from(
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
The buf_pool_mutex may only be released and reacquired if lru != NULL.
@return allocated block, possibly NULL if lru==NULL */
The buf_pool_mutex may be released and reacquired.
@return allocated block, never NULL */
UNIV_INTERN
void*
buf_buddy_alloc_low(
......@@ -294,13 +271,14 @@ buf_buddy_alloc_low(
or BUF_BUDDY_SIZES */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
and buf_pool_mutex was temporarily released */
{
buf_block_t* block;
ut_ad(lru);
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
if (i < BUF_BUDDY_SIZES) {
/* Try to allocate from the buddy system. */
......@@ -320,11 +298,6 @@ buf_buddy_alloc_low(
goto alloc_big;
}
if (!lru) {
return(NULL);
}
/* Try replacing an uncompressed page in the buffer pool. */
buf_pool_mutex_exit();
block = buf_LRU_get_free_block();
......@@ -341,65 +314,6 @@ func_exit:
return(block);
}
/**********************************************************************//**
Try to relocate the control block of a compressed page.
@return TRUE if relocated */
static
ibool
buf_buddy_relocate_block(
/*=====================*/
buf_page_t* bpage, /*!< in: block to relocate */
buf_page_t* dpage) /*!< in: free block to relocate to */
{
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
buf_page_t* b;
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
ut_ad(buf_pool_mutex_own());
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_FILE_PAGE:
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
ut_error;
case BUF_BLOCK_ZIP_DIRTY:
/* Cannot relocate dirty pages. */
return(FALSE);
case BUF_BLOCK_ZIP_PAGE:
break;
}
mutex_enter(&buf_pool_zip_mutex);
if (!buf_page_can_relocate(bpage)) {
mutex_exit(&buf_pool_zip_mutex);
return(FALSE);
}
buf_relocate(bpage, dpage);
ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/* relocate buf_pool->zip_clean */
b = UT_LIST_GET_PREV(list, dpage);
UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
if (b) {
UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
} else {
UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
}
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
UNIV_MEM_INVALID(bpage, sizeof *bpage);
mutex_exit(&buf_pool_zip_mutex);
return(TRUE);
}
/**********************************************************************//**
Try to relocate a block.
@return TRUE if relocated */
......@@ -414,106 +328,89 @@ buf_buddy_relocate(
buf_page_t* bpage;
const ulint size = BUF_BUDDY_LOW << i;
ullint usec = ut_time_us(NULL);
mutex_t* mutex;
ulint space;
ulint page_no;
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_ad(!ut_align_offset(src, size));
ut_ad(!ut_align_offset(dst, size));
ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
UNIV_MEM_ASSERT_W(dst, size);
/* We assume that all memory from buf_buddy_alloc()
is used for either compressed pages or buf_page_t
objects covering compressed pages. */
is used for compressed page frames. */
/* We look inside the allocated objects returned by
buf_buddy_alloc() and assume that anything of
PAGE_ZIP_MIN_SIZE or larger is a compressed page that contains
a valid space_id and page_no in the page header. Should the
fields be invalid, we will be unable to relocate the block.
We also assume that anything that fits sizeof(buf_page_t)
actually is a properly initialized buf_page_t object. */
if (size >= PAGE_ZIP_MIN_SIZE) {
/* This is a compressed page. */
mutex_t* mutex;
/* The src block may be split into smaller blocks,
some of which may be free. Thus, the
mach_read_from_4() calls below may attempt to read
from free memory. The memory is "owned" by the buddy
allocator (and it has been allocated from the buffer
pool), so there is nothing wrong about this. The
mach_read_from_4() calls here will only trigger bogus
Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
ulint space = mach_read_from_4(
(const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
ulint page_no = mach_read_from_4(
(const byte*) src + FIL_PAGE_OFFSET);
/* Suppress Valgrind warnings about conditional jump
on uninitialized value. */
UNIV_MEM_VALID(&space, sizeof space);
UNIV_MEM_VALID(&page_no, sizeof page_no);
bpage = buf_page_hash_get(space, page_no);
if (!bpage || bpage->zip.data != src) {
/* The block has probably been freshly
allocated by buf_LRU_get_free_block() but not
added to buf_pool->page_hash yet. Obviously,
it cannot be relocated. */
return(FALSE);
}
buf_buddy_alloc() and assume that each block is a compressed
page that contains a valid space_id and page_no in the page
header. Should the fields be invalid, we will be unable to
relocate the block. */
/* The src block may be split into smaller blocks,
some of which may be free. Thus, the
mach_read_from_4() calls below may attempt to read
from free memory. The memory is "owned" by the buddy
allocator (and it has been allocated from the buffer
pool), so there is nothing wrong about this. The
mach_read_from_4() calls here will only trigger bogus
Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
space = mach_read_from_4((const byte *) src
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
page_no = mach_read_from_4((const byte *) src
+ FIL_PAGE_OFFSET);
/* Suppress Valgrind warnings about conditional jump
on uninitialized value. */
UNIV_MEM_VALID(&space, sizeof space);
UNIV_MEM_VALID(&page_no, sizeof page_no);
bpage = buf_page_hash_get(space, page_no);
if (!bpage || bpage->zip.data != src) {
/* The block has probably been freshly
allocated by buf_LRU_get_free_block() but not
added to buf_pool->page_hash yet. Obviously,
it cannot be relocated. */
if (page_zip_get_size(&bpage->zip) != size) {
/* The block is of different size. We would
have to relocate all blocks covered by src.
For the sake of simplicity, give up. */
ut_ad(page_zip_get_size(&bpage->zip) < size);
return(FALSE);
}
return(FALSE);
}
if (page_zip_get_size(&bpage->zip) != size) {
/* The block is of different size. We would
have to relocate all blocks covered by src.
For the sake of simplicity, give up. */
ut_ad(page_zip_get_size(&bpage->zip) < size);
/* The block must have been allocated, but it may
contain uninitialized data. */
UNIV_MEM_ASSERT_W(src, size);
mutex = buf_page_get_mutex(bpage);
mutex_enter(mutex);
if (buf_page_can_relocate(bpage)) {
/* Relocate the compressed page. */
ut_a(bpage->zip.data == src);
memcpy(dst, src, size);
bpage->zip.data = dst;
mutex_exit(mutex);
success:
UNIV_MEM_INVALID(src, size);
{
buf_buddy_stat_t* buddy_stat
= &buf_buddy_stat[i];
buddy_stat->relocated++;
buddy_stat->relocated_usec
+= ut_time_us(NULL) - usec;
}
return(TRUE);
}
return(FALSE);
}
mutex_exit(mutex);
} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
/* This must be a buf_page_t object. */
#if UNIV_WORD_SIZE == 4
/* On 32-bit systems, there is no padding in
buf_page_t. On other systems, Valgrind could complain
about uninitialized pad bytes. */
UNIV_MEM_ASSERT_RW(src, size);
#endif
if (buf_buddy_relocate_block(src, dst)) {
/* The block must have been allocated, but it may
contain uninitialized data. */
UNIV_MEM_ASSERT_W(src, size);
mutex = buf_page_get_mutex(bpage);
mutex_enter(mutex);
goto success;
if (buf_page_can_relocate(bpage)) {
/* Relocate the compressed page. */
ut_a(bpage->zip.data == src);
memcpy(dst, src, size);
bpage->zip.data = dst;
mutex_exit(mutex);
UNIV_MEM_INVALID(src, size);
{
buf_buddy_stat_t* buddy_stat
= &buf_buddy_stat[i];
buddy_stat->relocated++;
buddy_stat->relocated_usec
+= ut_time_us(NULL) - usec;
}
return(TRUE);
}
mutex_exit(mutex);
return(FALSE);
}
......@@ -534,12 +431,14 @@ buf_buddy_free_low(
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_ad(i <= BUF_BUDDY_SIZES);
ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
ut_ad(buf_buddy_stat[i].used > 0);
buf_buddy_stat[i].used--;
recombine:
UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE;
if (i == BUF_BUDDY_SIZES) {
buf_buddy_block_free(buf);
......@@ -550,32 +449,36 @@ recombine:
ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
ut_ad(!buf_pool_contains_zip(buf));
/* Try to combine adjacent blocks. */
/* Do not recombine blocks if there are few free blocks.
We may waste up to 15360*max_len bytes to free blocks
(1024 + 2048 + 4096 + 8192 = 15360) */
if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16) {
goto func_exit;
}
/* Try to combine adjacent blocks. */
buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
/* When Valgrind instrumentation is not enabled, we can read
buddy->state to quickly determine that a block is not free.
When the block is not free, buddy->state belongs to a compressed
page frame that may be flagged uninitialized in our Valgrind
instrumentation. */
if (buddy->state != BUF_BLOCK_ZIP_FREE) {
goto buddy_nonfree;
}
/* The field buddy->state can only be trusted for free blocks.
If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if
it is in the free list. */
#endif /* !UNIV_DEBUG_VALGRIND */
for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
if (bpage == buddy) {
buddy_free:
/* The buddy is free: recombine */
buf_buddy_remove_from_free(bpage, i);
buddy_free2:
buddy_is_free:
ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
ut_ad(!buf_pool_contains_zip(buddy));
i++;
......@@ -585,122 +488,43 @@ buddy_free2:
}
ut_a(bpage != buf);
{
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
bpage = next;
}
UNIV_MEM_ASSERT_W(bpage, BUF_BUDDY_LOW << i);
bpage = UT_LIST_GET_NEXT(list, bpage);
}
#ifndef UNIV_DEBUG_VALGRIND
buddy_nonfree:
/* Valgrind would complain about accessing free memory. */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(ut_list_node_313)
== BUF_BLOCK_ZIP_FREE)));
#endif /* UNIV_DEBUG_VALGRIND */
#endif /* !UNIV_DEBUG_VALGRIND */
ut_d(BUF_BUDDY_LIST_VALIDATE(i));
/* The buddy is not free. Is there a free block of this size? */
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (bpage) {
/* Remove the block from the free list, because a successful
buf_buddy_relocate() will overwrite bpage->list. */
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
buf_buddy_remove_from_free(bpage, i);
/* Try to relocate the buddy of buf to the free block. */
if (buf_buddy_relocate(buddy, bpage, i)) {
ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
goto buddy_free2;
buddy->state = BUF_BLOCK_ZIP_FREE;
goto buddy_is_free;
}
buf_buddy_add_to_free(bpage, i);
/* Try to relocate the buddy of the free block to buf. */
buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage),
BUF_BUDDY_LOW << i);
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
/* The buddy must not be (completely) free, because we
always recombine adjacent free blocks.
(Parts of the buddy can be free in
buf_pool->zip_free[j] with j < i.) */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(
ut_list_node_313)
== BUF_BLOCK_ZIP_FREE
&& ut_list_node_313 != buddy)));
#endif /* !UNIV_DEBUG_VALGRIND */
if (buf_buddy_relocate(buddy, buf, i)) {
buf = bpage;
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
goto buddy_free;
}
}
func_exit:
/* Free the block to the buddy list. */
bpage = buf;
#ifdef UNIV_DEBUG
if (i < buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE)) {
/* This area has most likely been allocated for at
least one compressed-only block descriptor. Check
that there are no live objects in the area. This is
not a complete check: it may yield false positives as
well as false negatives. Also, due to buddy blocks
being recombined, it is possible (although unlikely)
that this branch is never reached. */
char* c;
# ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing
uninitialized memory. Besides, Valgrind performs a
more exhaustive check, at every memory access. */
const buf_page_t* b = buf;
const buf_page_t* const b_end = (buf_page_t*)
((char*) b + (BUF_BUDDY_LOW << i));
for (; b < b_end; b++) {
/* Avoid false positives (and cause false
negatives) by checking for b->space < 1000. */
if ((b->state == BUF_BLOCK_ZIP_PAGE
|| b->state == BUF_BLOCK_ZIP_DIRTY)
&& b->space > 0 && b->space < 1000) {
fprintf(stderr,
"buddy dirty %p %u (%u,%u) %p,%lu\n",
(void*) b,
b->state, b->space, b->offset,
buf, i);
}
}
# endif /* !UNIV_DEBUG_VALGRIND */
/* Scramble the block. This should make any pointers
invalid and trigger a segmentation violation. Because
the scrambling can be reversed, it may be possible to
track down the object pointing to the freed data by
dereferencing the unscrambled bpage->LRU or
bpage->list pointers. */
for (c = (char*) buf + (BUF_BUDDY_LOW << i);
c-- > (char*) buf; ) {
*c = ~*c ^ i;
}
} else {
/* Fill large blocks with a constant pattern. */
memset(bpage, i, BUF_BUDDY_LOW << i);
}
#endif /* UNIV_DEBUG */
/* Fill large blocks with a constant pattern. */
ut_d(memset(bpage, i, BUF_BUDDY_LOW << i));
UNIV_MEM_INVALID(bpage, BUF_BUDDY_LOW << i);
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_add_to_free(bpage, i);
}
......@@ -1358,7 +1358,7 @@ err_exit:
mutex_enter(block_mutex);
/* Discard the uncompressed page frame if possible. */
if (buf_LRU_free_block(bpage, FALSE) == BUF_LRU_FREED) {
if (buf_LRU_free_block(bpage, FALSE)) {
mutex_exit(block_mutex);
goto lookup;
......@@ -1699,13 +1699,8 @@ loop:
if (block) {
/* If the guess is a compressed page descriptor that
has been allocated by buf_buddy_alloc(), it may have
been invalidated by buf_buddy_relocate(). In that
case, block could point to something that happens to
contain the expected bits in block->page. Similarly,
the guess may be pointing to a buffer pool chunk that
has been released when resizing the buffer pool. */
has been allocated by buf_page_alloc_descriptor(),
it may have been freed by buf_relocate(). */
if (!buf_block_is_uncompressed(block)
|| offset != block->page.offset
|| space != block->page.space
......@@ -1889,11 +1884,10 @@ wait_until_unfixed:
mutex_exit(&buf_pool_zip_mutex);
buf_pool->n_pend_unzip++;
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof *bpage);
buf_pool_mutex_exit();
buf_page_free_descriptor(bpage);
/* Decompress the page and apply buffered operations
while not holding buf_pool_mutex or block->mutex. */
success = buf_zip_decompress(block, srv_use_checksums);
......@@ -1937,7 +1931,7 @@ wait_until_unfixed:
/* Try to evict the block from the buffer pool, to use the
insert buffer as much as possible. */
if (buf_LRU_free_block(&block->page, TRUE) == BUF_LRU_FREED) {
if (buf_LRU_free_block(&block->page, TRUE)) {
buf_pool_mutex_exit();
mutex_exit(&block->mutex);
fprintf(stderr,
......@@ -2551,17 +2545,12 @@ err_exit:
mutex_exit(&block->mutex);
} else {
/* Defer buf_buddy_alloc() until after the block has
been found not to exist. The buf_buddy_alloc() and
buf_buddy_free() calls may be expensive because of
buf_buddy_relocate(). */
/* The compressed page must be allocated before the
control block (bpage), in order to avoid the
invocation of buf_buddy_relocate_block() on
uninitialized data. */
data = buf_buddy_alloc(zip_size, &lru);
bpage = buf_buddy_alloc(sizeof *bpage, &lru);
/* If buf_buddy_alloc() allocated storage from the LRU list,
it released and reacquired buf_pool_mutex. Thus, we must
......@@ -2569,15 +2558,13 @@ err_exit:
if (UNIV_UNLIKELY(lru)
&& UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
/* The block was added by some other thread. */
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof *bpage);
buf_buddy_free(data, zip_size);
bpage = NULL;
goto func_exit;
}
bpage = buf_page_alloc_descriptor();
page_zip_des_init(&bpage->zip);
page_zip_set_size(&bpage->zip, zip_size);
bpage->zip.data = data;
......
......@@ -355,7 +355,7 @@ scan_again:
while (bpage != NULL) {
buf_page_t* prev_bpage;
ibool prev_bpage_buf_fix = FALSE;
mutex_t* block_mutex = NULL;
ut_a(buf_page_in_file(bpage));
......@@ -368,18 +368,21 @@ scan_again:
if (buf_page_get_space(bpage) != id) {
/* Skip this block, as it does not belong to
the space that is being invalidated. */
goto next_page;
} else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
/* We cannot remove this page during this scan
yet; maybe the system is currently reading it
in, or flushing the modifications to the file */
all_freed = FALSE;
goto next_page;
} else {
mutex_t* block_mutex = buf_page_get_mutex(bpage);
block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
if (bpage->buf_fix_count > 0) {
mutex_exit(block_mutex);
/* We cannot remove this page during
this scan yet; maybe the system is
currently reading it in, or flushing
......@@ -389,106 +392,59 @@ scan_again:
goto next_page;
}
}
ut_ad(mutex_own(block_mutex));
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr,
"Dropping space %lu page %lu\n",
(ulong) buf_page_get_space(bpage),
(ulong) buf_page_get_page_no(bpage));
}
if (buf_debug_prints) {
fprintf(stderr,
"Dropping space %lu page %lu\n",
(ulong) buf_page_get_space(bpage),
(ulong) buf_page_get_page_no(bpage));
}
#endif
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
/* This is a compressed-only block
descriptor. Ensure that prev_bpage
cannot be relocated when bpage is freed. */
if (UNIV_LIKELY(prev_bpage != NULL)) {
switch (buf_page_get_state(
prev_bpage)) {
case BUF_BLOCK_FILE_PAGE:
/* Descriptors of uncompressed
blocks will not be relocated,
because we are holding the
buf_pool_mutex. */
break;
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
/* Descriptors of compressed-
only blocks can be relocated,
unless they are buffer-fixed.
Because both bpage and
prev_bpage are protected by
buf_pool_zip_mutex, it is
not necessary to acquire
further mutexes. */
ut_ad(&buf_pool_zip_mutex
== block_mutex);
ut_ad(mutex_own(block_mutex));
prev_bpage_buf_fix = TRUE;
prev_bpage->buf_fix_count++;
break;
default:
ut_error;
}
}
} else if (((buf_block_t*) bpage)->is_hashed) {
ulint page_no;
ulint zip_size;
buf_pool_mutex_exit();
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
/* This is a compressed-only block
descriptor. Do nothing. */
} else if (((buf_block_t*) bpage)->is_hashed) {
ulint page_no;
ulint zip_size;
zip_size = buf_page_get_zip_size(bpage);
page_no = buf_page_get_page_no(bpage);
buf_pool_mutex_exit();
mutex_exit(block_mutex);
zip_size = buf_page_get_zip_size(bpage);
page_no = buf_page_get_page_no(bpage);
/* Note that the following call will acquire
an S-latch on the page */
mutex_exit(block_mutex);
btr_search_drop_page_hash_when_freed(
id, zip_size, page_no);
goto scan_again;
}
/* Note that the following call will acquire
an S-latch on the page */
if (bpage->oldest_modification != 0) {
btr_search_drop_page_hash_when_freed(
id, zip_size, page_no);
goto scan_again;
}
buf_flush_remove(bpage);
}
if (bpage->oldest_modification != 0) {
/* Remove from the LRU list. */
buf_flush_remove(bpage);
}
if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
!= BUF_BLOCK_ZIP_FREE) {
buf_LRU_block_free_hashed_page((buf_block_t*)
bpage);
} else {
/* The block_mutex should have been
released by buf_LRU_block_remove_hashed_page()
when it returns BUF_BLOCK_ZIP_FREE. */
ut_ad(block_mutex == &buf_pool_zip_mutex);
ut_ad(!mutex_own(block_mutex));
if (prev_bpage_buf_fix) {
/* We temporarily buffer-fixed
prev_bpage, so that
buf_buddy_free() could not
relocate it, in case it was a
compressed-only block
descriptor. */
mutex_enter(block_mutex);
ut_ad(prev_bpage->buf_fix_count > 0);
prev_bpage->buf_fix_count--;
mutex_exit(block_mutex);
}
/* Remove from the LRU list. */
goto next_page_no_mutex;
}
next_page:
if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
!= BUF_BLOCK_ZIP_FREE) {
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
mutex_exit(block_mutex);
} else {
/* The block_mutex should have been released
by buf_LRU_block_remove_hashed_page() when it
returns BUF_BLOCK_ZIP_FREE. */
ut_ad(block_mutex == &buf_pool_zip_mutex);
ut_ad(!mutex_own(block_mutex));
}
next_page_no_mutex:
next_page:
bpage = prev_bpage;
}
......@@ -574,7 +530,7 @@ buf_LRU_free_from_unzip_LRU_list(
UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
enum buf_lru_free_block_status freed;
ibool freed;
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->in_unzip_LRU_list);
......@@ -584,24 +540,9 @@ buf_LRU_free_from_unzip_LRU_list(
freed = buf_LRU_free_block(&block->page, FALSE);
mutex_exit(&block->mutex);
switch (freed) {
case BUF_LRU_FREED:
if (freed) {
return(TRUE);
case BUF_LRU_CANNOT_RELOCATE:
/* If we failed to relocate, try
regular LRU eviction. */
return(FALSE);
case BUF_LRU_NOT_FREED:
/* The block was buffer-fixed or I/O-fixed.
Keep looking. */
continue;
}
/* inappropriate return value from
buf_LRU_free_block() */
ut_error;
}
return(FALSE);
......@@ -632,10 +573,9 @@ buf_LRU_free_from_common_LRU_list(
UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
enum buf_lru_free_block_status freed;
unsigned accessed;
mutex_t* block_mutex
= buf_page_get_mutex(bpage);
ibool freed;
unsigned accessed;
mutex_t* block_mutex = buf_page_get_mutex(bpage);
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
......@@ -645,8 +585,7 @@ buf_LRU_free_from_common_LRU_list(
freed = buf_LRU_free_block(bpage, TRUE);
mutex_exit(block_mutex);
switch (freed) {
case BUF_LRU_FREED:
if (freed) {
/* Keep track of pages that are evicted without
ever being accessed. This gives us a measure of
the effectiveness of readahead */
......@@ -654,21 +593,7 @@ buf_LRU_free_from_common_LRU_list(
++buf_pool->stat.n_ra_pages_evicted;
}
return(TRUE);
case BUF_LRU_NOT_FREED:
/* The block was dirty, buffer-fixed, or I/O-fixed.
Keep looking. */
continue;
case BUF_LRU_CANNOT_RELOCATE:
/* This should never occur, because we
want to discard the compressed page too. */
break;
}
/* inappropriate return value from
buf_LRU_free_block() */
ut_error;
}
return(FALSE);
......@@ -1350,17 +1275,16 @@ buf_LRU_make_block_old(
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
NOTE: If this function returns BUF_LRU_FREED, it will temporarily
NOTE: If this function returns TRUE, it will temporarily
release buf_pool_mutex. Furthermore, the page frame will no longer be
accessible via bpage.
The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
release these two mutexes after the call. No other
buf_page_get_mutex() may be held when calling this function.
@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
BUF_LRU_NOT_FREED otherwise. */
@return TRUE if freed, FALSE otherwise. */
UNIV_INTERN
enum buf_lru_free_block_status
ibool
buf_LRU_free_block(
/*===============*/
buf_page_t* bpage, /*!< in: block to be freed */
......@@ -1385,7 +1309,7 @@ buf_LRU_free_block(
if (!buf_page_can_relocate(bpage)) {
/* Do not free buffer-fixed or I/O-fixed blocks. */
return(BUF_LRU_NOT_FREED);
return(FALSE);
}
#ifdef UNIV_IBUF_COUNT_DEBUG
......@@ -1397,7 +1321,7 @@ buf_LRU_free_block(
/* Do not completely free dirty blocks. */
if (bpage->oldest_modification) {
return(BUF_LRU_NOT_FREED);
return(FALSE);
}
} else if (bpage->oldest_modification) {
/* Do not completely free dirty blocks. */
......@@ -1405,7 +1329,7 @@ buf_LRU_free_block(
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
ut_ad(buf_page_get_state(bpage)
== BUF_BLOCK_ZIP_DIRTY);
return(BUF_LRU_NOT_FREED);
return(FALSE);
}
goto alloc;
......@@ -1414,14 +1338,8 @@ buf_LRU_free_block(
If it cannot be allocated (without freeing a block
from the LRU list), refuse to free bpage. */
alloc:
buf_pool_mutex_exit_forbid();
b = buf_buddy_alloc(sizeof *b, NULL);
buf_pool_mutex_exit_allow();
if (UNIV_UNLIKELY(!b)) {
return(BUF_LRU_CANNOT_RELOCATE);
}
b = buf_page_alloc_descriptor();
ut_a(b);
memcpy(b, bpage, sizeof *b);
}
......@@ -1589,7 +1507,7 @@ alloc:
mutex_enter(block_mutex);
}
return(BUF_LRU_FREED);
return(TRUE);
}
/******************************************************************//**
......@@ -1809,10 +1727,8 @@ buf_LRU_block_remove_hashed_page(
buf_pool_mutex_exit_forbid();
buf_buddy_free(bpage->zip.data,
page_zip_get_size(&bpage->zip));
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof(*bpage));
buf_pool_mutex_exit_allow();
UNIV_MEM_UNDESC(bpage);
buf_page_free_descriptor(bpage);
return(BUF_BLOCK_ZIP_FREE);
case BUF_BLOCK_FILE_PAGE:
......
/*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -37,24 +37,19 @@ Created December 2006 by Marko Makela
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any
block->mutex. The buf_pool_mutex may only be released and reacquired
if lru != NULL. This function should only be used for allocating
compressed page frames or control blocks (buf_page_t). Allocated
control blocks must be properly initialized immediately after
buf_buddy_alloc() has returned the memory, before releasing
buf_pool_mutex.
@return allocated block, possibly NULL if lru == NULL */
block->mutex. The buf_pool_mutex may be released and reacquired.
This function should only be used for allocating compressed page frames.
@return allocated block, never NULL */
UNIV_INLINE
void*
buf_buddy_alloc(
/*============*/
ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
ulint size, /*!< in: compressed page size
(between PAGE_ZIP_MIN_SIZE and UNIV_PAGE_SIZE) */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
__attribute__((malloc));
and buf_pool_mutex was temporarily released */
__attribute__((malloc, nonnull));
/**********************************************************************//**
Release a block. */
UNIV_INLINE
......
/*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -36,8 +36,8 @@ Created December 2006 by Marko Makela
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
The buf_pool_mutex may only be released and reacquired if lru != NULL.
@return allocated block, possibly NULL if lru==NULL */
The buf_pool_mutex may be released and reacquired.
@return allocated block, never NULL */
UNIV_INTERN
void*
buf_buddy_alloc_low(
......@@ -46,9 +46,8 @@ buf_buddy_alloc_low(
or BUF_BUDDY_SIZES */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
__attribute__((malloc));
and buf_pool_mutex was temporarily released */
__attribute__((malloc, nonnull));
/**********************************************************************//**
Deallocate a block. */
......@@ -74,6 +73,8 @@ buf_buddy_get_slot(
ulint i;
ulint s;
ut_ad(size >= PAGE_ZIP_MIN_SIZE);
for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) {
}
......@@ -84,26 +85,25 @@ buf_buddy_get_slot(
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any
block->mutex. The buf_pool_mutex may only be released and reacquired
if lru != NULL. This function should only be used for allocating
compressed page frames or control blocks (buf_page_t). Allocated
control blocks must be properly initialized immediately after
buf_buddy_alloc() has returned the memory, before releasing
buf_pool_mutex.
@return allocated block, possibly NULL if lru == NULL */
block->mutex. The buf_pool_mutex may be released and reacquired.
This function should only be used for allocating compressed page frames.
@return allocated block, never NULL */
UNIV_INLINE
void*
buf_buddy_alloc(
/*============*/
ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
ulint size, /*!< in: compressed page size
(between PAGE_ZIP_MIN_SIZE and UNIV_PAGE_SIZE) */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
and buf_pool_mutex was temporarily released */
{
ut_ad(buf_pool_mutex_own());
ut_ad(ut_is_2pow(size));
ut_ad(size >= PAGE_ZIP_MIN_SIZE);
ut_ad(size <= UNIV_PAGE_SIZE);
return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
return((byte*) buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
}
/**********************************************************************//**
......@@ -117,6 +117,9 @@ buf_buddy_free(
ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
{
ut_ad(buf_pool_mutex_own());
ut_ad(ut_is_2pow(size));
ut_ad(size >= PAGE_ZIP_MIN_SIZE);
ut_ad(size <= UNIV_PAGE_SIZE);
buf_buddy_free_low(buf, buf_buddy_get_slot(size));
}
......
......@@ -156,6 +156,23 @@ UNIV_INLINE
ib_uint64_t
buf_pool_get_oldest_modification(void);
/*==================================*/
/********************************************************************//**
Allocates a buf_page_t descriptor. This function must succeed. In case
of failure we assert in this function. */
UNIV_INLINE
buf_page_t*
buf_page_alloc_descriptor(void)
/*===========================*/
__attribute__((malloc));
/********************************************************************//**
Free a buf_page_t descriptor. */
UNIV_INLINE
void
buf_page_free_descriptor(
/*=====================*/
buf_page_t* bpage) /*!< in: bpage descriptor to free. */
__attribute__((nonnull));
/********************************************************************//**
Allocates a buffer block.
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
......
......@@ -714,6 +714,35 @@ buf_block_get_lock_hash_val(
return(block->lock_hash_val);
}
/********************************************************************//**
Allocates a buf_page_t descriptor. This function must succeed. In case
of failure we assert in this function.
@return: the allocated descriptor. */
UNIV_INLINE
buf_page_t*
buf_page_alloc_descriptor(void)
/*===========================*/
{
buf_page_t* bpage;
bpage = (buf_page_t*) ut_malloc(sizeof *bpage);
ut_d(memset(bpage, 0, sizeof *bpage));
UNIV_MEM_ALLOC(bpage, sizeof *bpage);
return(bpage);
}
/********************************************************************//**
Free a buf_page_t descriptor. */
UNIV_INLINE
void
buf_page_free_descriptor(
/*=====================*/
buf_page_t* bpage) /*!< in: bpage descriptor to free. */
{
ut_free(bpage);
}
/********************************************************************//**
Allocates a buffer block.
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
......
......@@ -30,18 +30,6 @@ Created 11/5/1995 Heikki Tuuri
#include "ut0byte.h"
#include "buf0types.h"
/** The return type of buf_LRU_free_block() */
enum buf_lru_free_block_status {
/** freed */
BUF_LRU_FREED = 0,
/** not freed because the caller asked to remove the
uncompressed frame but the control block cannot be
relocated */
BUF_LRU_CANNOT_RELOCATE,
/** not freed because of some other reason */
BUF_LRU_NOT_FREED
};
/******************************************************************//**
Tries to remove LRU flushed blocks from the end of the LRU list and put them
to the free list. This is beneficial for the efficiency of the insert buffer
......@@ -98,17 +86,16 @@ buf_LRU_insert_zip_clean(
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
NOTE: If this function returns BUF_LRU_FREED, it will temporarily
NOTE: If this function returns TRUE, it will temporarily
release buf_pool_mutex. Furthermore, the page frame will no longer be
accessible via bpage.
The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
release these two mutexes after the call. No other
buf_page_get_mutex() may be held when calling this function.
@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
BUF_LRU_NOT_FREED otherwise. */
@return TRUE if freed, FALSE otherwise. */
UNIV_INTERN
enum buf_lru_free_block_status
ibool
buf_LRU_free_block(
/*===============*/
buf_page_t* bpage, /*!< in: block to be freed */
......
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -26,6 +26,8 @@ Created 11/17/1995 Heikki Tuuri
#ifndef buf0types_h
#define buf0types_h
#include "page0types.h"
/** Buffer page (uncompressed or compressed) */
typedef struct buf_page_struct buf_page_t;
/** Buffer block for which an uncompressed page exists */
......@@ -58,17 +60,10 @@ enum buf_io_fix {
/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
/* @{ */
#if UNIV_WORD_SIZE <= 4 /* 32-bit system */
/** Base-2 logarithm of the smallest buddy block size */
# define BUF_BUDDY_LOW_SHIFT 6
#else /* 64-bit system */
/** Base-2 logarithm of the smallest buddy block size */
# define BUF_BUDDY_LOW_SHIFT 7
#endif
#define BUF_BUDDY_LOW_SHIFT PAGE_ZIP_MIN_SIZE_SHIFT
#define BUF_BUDDY_LOW (1 << BUF_BUDDY_LOW_SHIFT)
/*!< minimum block size in the binary
buddy system; must be at least
sizeof(buf_page_t) */
#define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
/*!< number of buddy sizes */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment