Commit e60e6505 authored by Inaam Rana's avatar Inaam Rana

Bug 12635227 - 61188: DROP TABLE EXTREMELY SLOW

approved by: Marko
rb://681

Coalescing of free buf_page_t descriptors can prove to be one severe
bottleneck in performance of compression. One such workload where it
hurts badly is DROP TABLE. This patch removes buf_page_t allocations
from buf_buddy and uses ut_malloc instead.
In order to further reduce overhead of colaescing we no longer attempt
to coalesce a block if the corresponding free_list is less than 16 in
size.
parent 7de029da
2011-06-16 The InnoDB Team
* btr/btr0cur.c, buf/buf0buddy.c, buf/buf0buf.c, buf/buf0lru.c,
include/buf0buddy.h, include/buf0buddy.ic, include/buf0buf.h,
include/buf0buf.ic, include/buf0lru.h, include/buf0types.h:
Fix Bug#61188 DROP TABLE extremely slow
2011-06-16 The InnoDB Team 2011-06-16 The InnoDB Team
* buf/buf0buddy.c, buf/buf0buf.c, buf/buf0flu.c, buf/buf0lru.c, * buf/buf0buddy.c, buf/buf0buf.c, buf/buf0flu.c, buf/buf0lru.c,
......
...@@ -3864,7 +3864,7 @@ btr_blob_free( ...@@ -3864,7 +3864,7 @@ btr_blob_free(
&& buf_block_get_space(block) == space && buf_block_get_space(block) == space
&& buf_block_get_page_no(block) == page_no) { && buf_block_get_page_no(block) == page_no) {
if (buf_LRU_free_block(&block->page, all) != BUF_LRU_FREED if (!buf_LRU_free_block(&block->page, all)
&& all && block->page.zip.data) { && all && block->page.zip.data) {
/* Attempt to deallocate the uncompressed page /* Attempt to deallocate the uncompressed page
if the whole block cannot be deallocted. */ if the whole block cannot be deallocted. */
......
...@@ -45,6 +45,14 @@ static ulint buf_buddy_n_frames; ...@@ -45,6 +45,14 @@ static ulint buf_buddy_n_frames;
Protected by buf_pool_mutex. */ Protected by buf_pool_mutex. */
UNIV_INTERN buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1]; UNIV_INTERN buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1];
/** Validate a given zip_free list. */
#define BUF_BUDDY_LIST_VALIDATE(i) \
UT_LIST_VALIDATE(list, buf_page_t, \
buf_pool->zip_free[i], \
ut_ad(buf_page_get_state( \
ut_list_node_313) \
== BUF_BLOCK_ZIP_FREE))
/**********************************************************************//** /**********************************************************************//**
Get the offset of the buddy of a compressed page frame. Get the offset of the buddy of a compressed page frame.
@return the buddy relative of page */ @return the buddy relative of page */
...@@ -76,21 +84,10 @@ buf_buddy_add_to_free( ...@@ -76,21 +84,10 @@ buf_buddy_add_to_free(
buf_page_t* bpage, /*!< in,own: block to be freed */ buf_page_t* bpage, /*!< in,own: block to be freed */
ulint i) /*!< in: index of buf_pool->zip_free[] */ ulint i) /*!< in: index of buf_pool->zip_free[] */
{ {
#ifdef UNIV_DEBUG_VALGRIND
buf_page_t* b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
ut_ad(buf_pool_mutex_own()); ut_ad(buf_pool_mutex_own());
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE); ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
ut_ad(buf_pool->zip_free[i].start != bpage); ut_ad(buf_pool->zip_free[i].start != bpage);
UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage); UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
#ifdef UNIV_DEBUG_VALGRIND
if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
} }
/**********************************************************************//** /**********************************************************************//**
...@@ -102,25 +99,17 @@ buf_buddy_remove_from_free( ...@@ -102,25 +99,17 @@ buf_buddy_remove_from_free(
buf_page_t* bpage, /*!< in: block to be removed */ buf_page_t* bpage, /*!< in: block to be removed */
ulint i) /*!< in: index of buf_pool->zip_free[] */ ulint i) /*!< in: index of buf_pool->zip_free[] */
{ {
#ifdef UNIV_DEBUG_VALGRIND #ifdef UNIV_DEBUG
buf_page_t* prev = UT_LIST_GET_PREV(list, bpage); buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage); buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE); ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE); ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
#endif /* UNIV_DEBUG_VALGRIND */ #endif /* UNIV_DEBUG */
ut_ad(buf_pool_mutex_own()); ut_ad(buf_pool_mutex_own());
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE); ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage); UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
#ifdef UNIV_DEBUG_VALGRIND
if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
if (next) UNIV_MEM_FREE(next, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
} }
/**********************************************************************//** /**********************************************************************//**
...@@ -136,17 +125,13 @@ buf_buddy_alloc_zip( ...@@ -136,17 +125,13 @@ buf_buddy_alloc_zip(
ut_ad(buf_pool_mutex_own()); ut_ad(buf_pool_mutex_own());
ut_a(i < BUF_BUDDY_SIZES); ut_a(i < BUF_BUDDY_SIZES);
ut_a(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
ut_d(BUF_BUDDY_LIST_VALIDATE(i));
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(ut_list_node_313)
== BUF_BLOCK_ZIP_FREE)));
#endif /* !UNIV_DEBUG_VALGRIND */
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (bpage) { if (bpage) {
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE); ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
buf_buddy_remove_from_free(bpage, i); buf_buddy_remove_from_free(bpage, i);
...@@ -165,13 +150,10 @@ buf_buddy_alloc_zip( ...@@ -165,13 +150,10 @@ buf_buddy_alloc_zip(
} }
} }
#ifdef UNIV_DEBUG
if (bpage) { if (bpage) {
memset(bpage, ~i, BUF_BUDDY_LOW << i); ut_d(memset(bpage, ~i, BUF_BUDDY_LOW << i));
UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
} }
#endif /* UNIV_DEBUG */
UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
return(bpage); return(bpage);
} }
...@@ -255,6 +237,7 @@ buf_buddy_alloc_from( ...@@ -255,6 +237,7 @@ buf_buddy_alloc_from(
{ {
ulint offs = BUF_BUDDY_LOW << j; ulint offs = BUF_BUDDY_LOW << j;
ut_ad(j <= BUF_BUDDY_SIZES); ut_ad(j <= BUF_BUDDY_SIZES);
ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
ut_ad(j >= i); ut_ad(j >= i);
ut_ad(!ut_align_offset(buf, offs)); ut_ad(!ut_align_offset(buf, offs));
...@@ -268,13 +251,7 @@ buf_buddy_alloc_from( ...@@ -268,13 +251,7 @@ buf_buddy_alloc_from(
bpage = (buf_page_t*) ((byte*) buf + offs); bpage = (buf_page_t*) ((byte*) buf + offs);
ut_d(memset(bpage, j, BUF_BUDDY_LOW << j)); ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
bpage->state = BUF_BLOCK_ZIP_FREE; bpage->state = BUF_BLOCK_ZIP_FREE;
#ifndef UNIV_DEBUG_VALGRIND ut_d(BUF_BUDDY_LIST_VALIDATE(i));
/* Valgrind would complain about accessing free memory. */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(
ut_list_node_313)
== BUF_BLOCK_ZIP_FREE)));
#endif /* !UNIV_DEBUG_VALGRIND */
buf_buddy_add_to_free(bpage, j); buf_buddy_add_to_free(bpage, j);
} }
...@@ -284,8 +261,8 @@ buf_buddy_alloc_from( ...@@ -284,8 +261,8 @@ buf_buddy_alloc_from(
/**********************************************************************//** /**********************************************************************//**
Allocate a block. The thread calling this function must hold Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex. buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
The buf_pool_mutex may only be released and reacquired if lru != NULL. The buf_pool_mutex may be released and reacquired.
@return allocated block, possibly NULL if lru==NULL */ @return allocated block, never NULL */
UNIV_INTERN UNIV_INTERN
void* void*
buf_buddy_alloc_low( buf_buddy_alloc_low(
...@@ -294,13 +271,14 @@ buf_buddy_alloc_low( ...@@ -294,13 +271,14 @@ buf_buddy_alloc_low(
or BUF_BUDDY_SIZES */ or BUF_BUDDY_SIZES */
ibool* lru) /*!< in: pointer to a variable that will be assigned ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released, and buf_pool_mutex was temporarily released */
or NULL if the LRU list should not be used */
{ {
buf_block_t* block; buf_block_t* block;
ut_ad(lru);
ut_ad(buf_pool_mutex_own()); ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex)); ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
if (i < BUF_BUDDY_SIZES) { if (i < BUF_BUDDY_SIZES) {
/* Try to allocate from the buddy system. */ /* Try to allocate from the buddy system. */
...@@ -320,11 +298,6 @@ buf_buddy_alloc_low( ...@@ -320,11 +298,6 @@ buf_buddy_alloc_low(
goto alloc_big; goto alloc_big;
} }
if (!lru) {
return(NULL);
}
/* Try replacing an uncompressed page in the buffer pool. */ /* Try replacing an uncompressed page in the buffer pool. */
buf_pool_mutex_exit(); buf_pool_mutex_exit();
block = buf_LRU_get_free_block(); block = buf_LRU_get_free_block();
...@@ -341,65 +314,6 @@ func_exit: ...@@ -341,65 +314,6 @@ func_exit:
return(block); return(block);
} }
/**********************************************************************//**
Try to relocate the control block of a compressed page.
@return TRUE if relocated */
static
ibool
buf_buddy_relocate_block(
/*=====================*/
buf_page_t* bpage, /*!< in: block to relocate */
buf_page_t* dpage) /*!< in: free block to relocate to */
{
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
buf_page_t* b;
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
ut_ad(buf_pool_mutex_own());
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_FILE_PAGE:
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
ut_error;
case BUF_BLOCK_ZIP_DIRTY:
/* Cannot relocate dirty pages. */
return(FALSE);
case BUF_BLOCK_ZIP_PAGE:
break;
}
mutex_enter(&buf_pool_zip_mutex);
if (!buf_page_can_relocate(bpage)) {
mutex_exit(&buf_pool_zip_mutex);
return(FALSE);
}
buf_relocate(bpage, dpage);
ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/* relocate buf_pool->zip_clean */
b = UT_LIST_GET_PREV(list, dpage);
UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
if (b) {
UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
} else {
UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
}
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
UNIV_MEM_INVALID(bpage, sizeof *bpage);
mutex_exit(&buf_pool_zip_mutex);
return(TRUE);
}
/**********************************************************************//** /**********************************************************************//**
Try to relocate a block. Try to relocate a block.
@return TRUE if relocated */ @return TRUE if relocated */
...@@ -414,106 +328,89 @@ buf_buddy_relocate( ...@@ -414,106 +328,89 @@ buf_buddy_relocate(
buf_page_t* bpage; buf_page_t* bpage;
const ulint size = BUF_BUDDY_LOW << i; const ulint size = BUF_BUDDY_LOW << i;
ullint usec = ut_time_us(NULL); ullint usec = ut_time_us(NULL);
mutex_t* mutex;
ulint space;
ulint page_no;
ut_ad(buf_pool_mutex_own()); ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex)); ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_ad(!ut_align_offset(src, size)); ut_ad(!ut_align_offset(src, size));
ut_ad(!ut_align_offset(dst, size)); ut_ad(!ut_align_offset(dst, size));
ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
UNIV_MEM_ASSERT_W(dst, size); UNIV_MEM_ASSERT_W(dst, size);
/* We assume that all memory from buf_buddy_alloc() /* We assume that all memory from buf_buddy_alloc()
is used for either compressed pages or buf_page_t is used for compressed page frames. */
objects covering compressed pages. */
/* We look inside the allocated objects returned by /* We look inside the allocated objects returned by
buf_buddy_alloc() and assume that anything of buf_buddy_alloc() and assume that each block is a compressed
PAGE_ZIP_MIN_SIZE or larger is a compressed page that contains page that contains a valid space_id and page_no in the page
a valid space_id and page_no in the page header. Should the header. Should the fields be invalid, we will be unable to
fields be invalid, we will be unable to relocate the block. relocate the block. */
We also assume that anything that fits sizeof(buf_page_t)
actually is a properly initialized buf_page_t object. */ /* The src block may be split into smaller blocks,
some of which may be free. Thus, the
if (size >= PAGE_ZIP_MIN_SIZE) { mach_read_from_4() calls below may attempt to read
/* This is a compressed page. */ from free memory. The memory is "owned" by the buddy
mutex_t* mutex; allocator (and it has been allocated from the buffer
pool), so there is nothing wrong about this. The
/* The src block may be split into smaller blocks, mach_read_from_4() calls here will only trigger bogus
some of which may be free. Thus, the Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
mach_read_from_4() calls below may attempt to read space = mach_read_from_4((const byte *) src
from free memory. The memory is "owned" by the buddy + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
allocator (and it has been allocated from the buffer page_no = mach_read_from_4((const byte *) src
pool), so there is nothing wrong about this. The + FIL_PAGE_OFFSET);
mach_read_from_4() calls here will only trigger bogus /* Suppress Valgrind warnings about conditional jump
Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */ on uninitialized value. */
ulint space = mach_read_from_4( UNIV_MEM_VALID(&space, sizeof space);
(const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); UNIV_MEM_VALID(&page_no, sizeof page_no);
ulint page_no = mach_read_from_4( bpage = buf_page_hash_get(space, page_no);
(const byte*) src + FIL_PAGE_OFFSET);
/* Suppress Valgrind warnings about conditional jump if (!bpage || bpage->zip.data != src) {
on uninitialized value. */ /* The block has probably been freshly
UNIV_MEM_VALID(&space, sizeof space); allocated by buf_LRU_get_free_block() but not
UNIV_MEM_VALID(&page_no, sizeof page_no); added to buf_pool->page_hash yet. Obviously,
bpage = buf_page_hash_get(space, page_no); it cannot be relocated. */
if (!bpage || bpage->zip.data != src) {
/* The block has probably been freshly
allocated by buf_LRU_get_free_block() but not
added to buf_pool->page_hash yet. Obviously,
it cannot be relocated. */
return(FALSE);
}
if (page_zip_get_size(&bpage->zip) != size) { return(FALSE);
/* The block is of different size. We would }
have to relocate all blocks covered by src.
For the sake of simplicity, give up. */
ut_ad(page_zip_get_size(&bpage->zip) < size);
return(FALSE); if (page_zip_get_size(&bpage->zip) != size) {
} /* The block is of different size. We would
have to relocate all blocks covered by src.
For the sake of simplicity, give up. */
ut_ad(page_zip_get_size(&bpage->zip) < size);
/* The block must have been allocated, but it may return(FALSE);
contain uninitialized data. */ }
UNIV_MEM_ASSERT_W(src, size);
mutex = buf_page_get_mutex(bpage);
mutex_enter(mutex);
if (buf_page_can_relocate(bpage)) {
/* Relocate the compressed page. */
ut_a(bpage->zip.data == src);
memcpy(dst, src, size);
bpage->zip.data = dst;
mutex_exit(mutex);
success:
UNIV_MEM_INVALID(src, size);
{
buf_buddy_stat_t* buddy_stat
= &buf_buddy_stat[i];
buddy_stat->relocated++;
buddy_stat->relocated_usec
+= ut_time_us(NULL) - usec;
}
return(TRUE);
}
mutex_exit(mutex); /* The block must have been allocated, but it may
} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) { contain uninitialized data. */
/* This must be a buf_page_t object. */ UNIV_MEM_ASSERT_W(src, size);
#if UNIV_WORD_SIZE == 4
/* On 32-bit systems, there is no padding in mutex = buf_page_get_mutex(bpage);
buf_page_t. On other systems, Valgrind could complain
about uninitialized pad bytes. */ mutex_enter(mutex);
UNIV_MEM_ASSERT_RW(src, size);
#endif
if (buf_buddy_relocate_block(src, dst)) {
goto success; if (buf_page_can_relocate(bpage)) {
/* Relocate the compressed page. */
ut_a(bpage->zip.data == src);
memcpy(dst, src, size);
bpage->zip.data = dst;
mutex_exit(mutex);
UNIV_MEM_INVALID(src, size);
{
buf_buddy_stat_t* buddy_stat
= &buf_buddy_stat[i];
buddy_stat->relocated++;
buddy_stat->relocated_usec
+= ut_time_us(NULL) - usec;
} }
return(TRUE);
} }
mutex_exit(mutex);
return(FALSE); return(FALSE);
} }
...@@ -534,12 +431,14 @@ buf_buddy_free_low( ...@@ -534,12 +431,14 @@ buf_buddy_free_low(
ut_ad(buf_pool_mutex_own()); ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex)); ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_ad(i <= BUF_BUDDY_SIZES); ut_ad(i <= BUF_BUDDY_SIZES);
ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
ut_ad(buf_buddy_stat[i].used > 0); ut_ad(buf_buddy_stat[i].used > 0);
buf_buddy_stat[i].used--; buf_buddy_stat[i].used--;
recombine: recombine:
UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i); UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE); ((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE;
if (i == BUF_BUDDY_SIZES) { if (i == BUF_BUDDY_SIZES) {
buf_buddy_block_free(buf); buf_buddy_block_free(buf);
...@@ -550,32 +449,36 @@ recombine: ...@@ -550,32 +449,36 @@ recombine:
ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i)); ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
ut_ad(!buf_pool_contains_zip(buf)); ut_ad(!buf_pool_contains_zip(buf));
/* Try to combine adjacent blocks. */ /* Do not recombine blocks if there are few free blocks.
We may waste up to 15360*max_len bytes to free blocks
(1024 + 2048 + 4096 + 8192 = 15360) */
if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16) {
goto func_exit;
}
/* Try to combine adjacent blocks. */
buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i); buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);
#ifndef UNIV_DEBUG_VALGRIND #ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */ /* When Valgrind instrumentation is not enabled, we can read
buddy->state to quickly determine that a block is not free.
When the block is not free, buddy->state belongs to a compressed
page frame that may be flagged uninitialized in our Valgrind
instrumentation. */
if (buddy->state != BUF_BLOCK_ZIP_FREE) { if (buddy->state != BUF_BLOCK_ZIP_FREE) {
goto buddy_nonfree; goto buddy_nonfree;
} }
/* The field buddy->state can only be trusted for free blocks.
If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if
it is in the free list. */
#endif /* !UNIV_DEBUG_VALGRIND */ #endif /* !UNIV_DEBUG_VALGRIND */
for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) { for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE); ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
if (bpage == buddy) { if (bpage == buddy) {
buddy_free:
/* The buddy is free: recombine */ /* The buddy is free: recombine */
buf_buddy_remove_from_free(bpage, i); buf_buddy_remove_from_free(bpage, i);
buddy_free2: buddy_is_free:
ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE); ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
ut_ad(!buf_pool_contains_zip(buddy)); ut_ad(!buf_pool_contains_zip(buddy));
i++; i++;
...@@ -585,122 +488,43 @@ buddy_free2: ...@@ -585,122 +488,43 @@ buddy_free2:
} }
ut_a(bpage != buf); ut_a(bpage != buf);
UNIV_MEM_ASSERT_W(bpage, BUF_BUDDY_LOW << i);
{ bpage = UT_LIST_GET_NEXT(list, bpage);
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
bpage = next;
}
} }
#ifndef UNIV_DEBUG_VALGRIND #ifndef UNIV_DEBUG_VALGRIND
buddy_nonfree: buddy_nonfree:
/* Valgrind would complain about accessing free memory. */ #endif /* !UNIV_DEBUG_VALGRIND */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(ut_list_node_313) ut_d(BUF_BUDDY_LIST_VALIDATE(i));
== BUF_BLOCK_ZIP_FREE)));
#endif /* UNIV_DEBUG_VALGRIND */
/* The buddy is not free. Is there a free block of this size? */ /* The buddy is not free. Is there a free block of this size? */
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (bpage) { if (bpage) {
/* Remove the block from the free list, because a successful /* Remove the block from the free list, because a successful
buf_buddy_relocate() will overwrite bpage->list. */ buf_buddy_relocate() will overwrite bpage->list. */
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
buf_buddy_remove_from_free(bpage, i); buf_buddy_remove_from_free(bpage, i);
/* Try to relocate the buddy of buf to the free block. */ /* Try to relocate the buddy of buf to the free block. */
if (buf_buddy_relocate(buddy, bpage, i)) { if (buf_buddy_relocate(buddy, bpage, i)) {
ut_d(buddy->state = BUF_BLOCK_ZIP_FREE); buddy->state = BUF_BLOCK_ZIP_FREE;
goto buddy_free2; goto buddy_is_free;
} }
buf_buddy_add_to_free(bpage, i); buf_buddy_add_to_free(bpage, i);
/* Try to relocate the buddy of the free block to buf. */
buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage),
BUF_BUDDY_LOW << i);
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
/* The buddy must not be (completely) free, because we
always recombine adjacent free blocks.
(Parts of the buddy can be free in
buf_pool->zip_free[j] with j < i.) */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(
ut_list_node_313)
== BUF_BLOCK_ZIP_FREE
&& ut_list_node_313 != buddy)));
#endif /* !UNIV_DEBUG_VALGRIND */
if (buf_buddy_relocate(buddy, buf, i)) {
buf = bpage;
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
goto buddy_free;
}
} }
func_exit:
/* Free the block to the buddy list. */ /* Free the block to the buddy list. */
bpage = buf; bpage = buf;
#ifdef UNIV_DEBUG
if (i < buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE)) { /* Fill large blocks with a constant pattern. */
/* This area has most likely been allocated for at ut_d(memset(bpage, i, BUF_BUDDY_LOW << i));
least one compressed-only block descriptor. Check UNIV_MEM_INVALID(bpage, BUF_BUDDY_LOW << i);
that there are no live objects in the area. This is
not a complete check: it may yield false positives as
well as false negatives. Also, due to buddy blocks
being recombined, it is possible (although unlikely)
that this branch is never reached. */
char* c;
# ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing
uninitialized memory. Besides, Valgrind performs a
more exhaustive check, at every memory access. */
const buf_page_t* b = buf;
const buf_page_t* const b_end = (buf_page_t*)
((char*) b + (BUF_BUDDY_LOW << i));
for (; b < b_end; b++) {
/* Avoid false positives (and cause false
negatives) by checking for b->space < 1000. */
if ((b->state == BUF_BLOCK_ZIP_PAGE
|| b->state == BUF_BLOCK_ZIP_DIRTY)
&& b->space > 0 && b->space < 1000) {
fprintf(stderr,
"buddy dirty %p %u (%u,%u) %p,%lu\n",
(void*) b,
b->state, b->space, b->offset,
buf, i);
}
}
# endif /* !UNIV_DEBUG_VALGRIND */
/* Scramble the block. This should make any pointers
invalid and trigger a segmentation violation. Because
the scrambling can be reversed, it may be possible to
track down the object pointing to the freed data by
dereferencing the unscrambled bpage->LRU or
bpage->list pointers. */
for (c = (char*) buf + (BUF_BUDDY_LOW << i);
c-- > (char*) buf; ) {
*c = ~*c ^ i;
}
} else {
/* Fill large blocks with a constant pattern. */
memset(bpage, i, BUF_BUDDY_LOW << i);
}
#endif /* UNIV_DEBUG */
bpage->state = BUF_BLOCK_ZIP_FREE; bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_add_to_free(bpage, i); buf_buddy_add_to_free(bpage, i);
} }
...@@ -1358,7 +1358,7 @@ err_exit: ...@@ -1358,7 +1358,7 @@ err_exit:
mutex_enter(block_mutex); mutex_enter(block_mutex);
/* Discard the uncompressed page frame if possible. */ /* Discard the uncompressed page frame if possible. */
if (buf_LRU_free_block(bpage, FALSE) == BUF_LRU_FREED) { if (buf_LRU_free_block(bpage, FALSE)) {
mutex_exit(block_mutex); mutex_exit(block_mutex);
goto lookup; goto lookup;
...@@ -1699,13 +1699,8 @@ loop: ...@@ -1699,13 +1699,8 @@ loop:
if (block) { if (block) {
/* If the guess is a compressed page descriptor that /* If the guess is a compressed page descriptor that
has been allocated by buf_buddy_alloc(), it may have has been allocated by buf_page_alloc_descriptor(),
been invalidated by buf_buddy_relocate(). In that it may have been freed by buf_relocate(). */
case, block could point to something that happens to
contain the expected bits in block->page. Similarly,
the guess may be pointing to a buffer pool chunk that
has been released when resizing the buffer pool. */
if (!buf_block_is_uncompressed(block) if (!buf_block_is_uncompressed(block)
|| offset != block->page.offset || offset != block->page.offset
|| space != block->page.space || space != block->page.space
...@@ -1889,11 +1884,10 @@ wait_until_unfixed: ...@@ -1889,11 +1884,10 @@ wait_until_unfixed:
mutex_exit(&buf_pool_zip_mutex); mutex_exit(&buf_pool_zip_mutex);
buf_pool->n_pend_unzip++; buf_pool->n_pend_unzip++;
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof *bpage);
buf_pool_mutex_exit(); buf_pool_mutex_exit();
buf_page_free_descriptor(bpage);
/* Decompress the page and apply buffered operations /* Decompress the page and apply buffered operations
while not holding buf_pool_mutex or block->mutex. */ while not holding buf_pool_mutex or block->mutex. */
success = buf_zip_decompress(block, srv_use_checksums); success = buf_zip_decompress(block, srv_use_checksums);
...@@ -1937,7 +1931,7 @@ wait_until_unfixed: ...@@ -1937,7 +1931,7 @@ wait_until_unfixed:
/* Try to evict the block from the buffer pool, to use the /* Try to evict the block from the buffer pool, to use the
insert buffer as much as possible. */ insert buffer as much as possible. */
if (buf_LRU_free_block(&block->page, TRUE) == BUF_LRU_FREED) { if (buf_LRU_free_block(&block->page, TRUE)) {
buf_pool_mutex_exit(); buf_pool_mutex_exit();
mutex_exit(&block->mutex); mutex_exit(&block->mutex);
fprintf(stderr, fprintf(stderr,
...@@ -2551,17 +2545,12 @@ err_exit: ...@@ -2551,17 +2545,12 @@ err_exit:
mutex_exit(&block->mutex); mutex_exit(&block->mutex);
} else { } else {
/* Defer buf_buddy_alloc() until after the block has
been found not to exist. The buf_buddy_alloc() and
buf_buddy_free() calls may be expensive because of
buf_buddy_relocate(). */
/* The compressed page must be allocated before the /* The compressed page must be allocated before the
control block (bpage), in order to avoid the control block (bpage), in order to avoid the
invocation of buf_buddy_relocate_block() on invocation of buf_buddy_relocate_block() on
uninitialized data. */ uninitialized data. */
data = buf_buddy_alloc(zip_size, &lru); data = buf_buddy_alloc(zip_size, &lru);
bpage = buf_buddy_alloc(sizeof *bpage, &lru);
/* If buf_buddy_alloc() allocated storage from the LRU list, /* If buf_buddy_alloc() allocated storage from the LRU list,
it released and reacquired buf_pool_mutex. Thus, we must it released and reacquired buf_pool_mutex. Thus, we must
...@@ -2569,15 +2558,13 @@ err_exit: ...@@ -2569,15 +2558,13 @@ err_exit:
if (UNIV_UNLIKELY(lru) if (UNIV_UNLIKELY(lru)
&& UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) { && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
/* The block was added by some other thread. */
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof *bpage);
buf_buddy_free(data, zip_size); buf_buddy_free(data, zip_size);
bpage = NULL; bpage = NULL;
goto func_exit; goto func_exit;
} }
bpage = buf_page_alloc_descriptor();
page_zip_des_init(&bpage->zip); page_zip_des_init(&bpage->zip);
page_zip_set_size(&bpage->zip, zip_size); page_zip_set_size(&bpage->zip, zip_size);
bpage->zip.data = data; bpage->zip.data = data;
......
...@@ -355,7 +355,7 @@ scan_again: ...@@ -355,7 +355,7 @@ scan_again:
while (bpage != NULL) { while (bpage != NULL) {
buf_page_t* prev_bpage; buf_page_t* prev_bpage;
ibool prev_bpage_buf_fix = FALSE; mutex_t* block_mutex = NULL;
ut_a(buf_page_in_file(bpage)); ut_a(buf_page_in_file(bpage));
...@@ -368,18 +368,21 @@ scan_again: ...@@ -368,18 +368,21 @@ scan_again:
if (buf_page_get_space(bpage) != id) { if (buf_page_get_space(bpage) != id) {
/* Skip this block, as it does not belong to /* Skip this block, as it does not belong to
the space that is being invalidated. */ the space that is being invalidated. */
goto next_page;
} else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) { } else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
/* We cannot remove this page during this scan /* We cannot remove this page during this scan
yet; maybe the system is currently reading it yet; maybe the system is currently reading it
in, or flushing the modifications to the file */ in, or flushing the modifications to the file */
all_freed = FALSE; all_freed = FALSE;
goto next_page;
} else { } else {
mutex_t* block_mutex = buf_page_get_mutex(bpage); block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex); mutex_enter(block_mutex);
if (bpage->buf_fix_count > 0) { if (bpage->buf_fix_count > 0) {
mutex_exit(block_mutex);
/* We cannot remove this page during /* We cannot remove this page during
this scan yet; maybe the system is this scan yet; maybe the system is
currently reading it in, or flushing currently reading it in, or flushing
...@@ -389,106 +392,59 @@ scan_again: ...@@ -389,106 +392,59 @@ scan_again:
goto next_page; goto next_page;
} }
}
ut_ad(mutex_own(block_mutex));
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
if (buf_debug_prints) { if (buf_debug_prints) {
fprintf(stderr, fprintf(stderr,
"Dropping space %lu page %lu\n", "Dropping space %lu page %lu\n",
(ulong) buf_page_get_space(bpage), (ulong) buf_page_get_space(bpage),
(ulong) buf_page_get_page_no(bpage)); (ulong) buf_page_get_page_no(bpage));
} }
#endif #endif
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) { if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
/* This is a compressed-only block /* This is a compressed-only block
descriptor. Ensure that prev_bpage descriptor. Do nothing. */
cannot be relocated when bpage is freed. */ } else if (((buf_block_t*) bpage)->is_hashed) {
if (UNIV_LIKELY(prev_bpage != NULL)) { ulint page_no;
switch (buf_page_get_state( ulint zip_size;
prev_bpage)) {
case BUF_BLOCK_FILE_PAGE:
/* Descriptors of uncompressed
blocks will not be relocated,
because we are holding the
buf_pool_mutex. */
break;
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
/* Descriptors of compressed-
only blocks can be relocated,
unless they are buffer-fixed.
Because both bpage and
prev_bpage are protected by
buf_pool_zip_mutex, it is
not necessary to acquire
further mutexes. */
ut_ad(&buf_pool_zip_mutex
== block_mutex);
ut_ad(mutex_own(block_mutex));
prev_bpage_buf_fix = TRUE;
prev_bpage->buf_fix_count++;
break;
default:
ut_error;
}
}
} else if (((buf_block_t*) bpage)->is_hashed) {
ulint page_no;
ulint zip_size;
buf_pool_mutex_exit();
zip_size = buf_page_get_zip_size(bpage); buf_pool_mutex_exit();
page_no = buf_page_get_page_no(bpage);
mutex_exit(block_mutex); zip_size = buf_page_get_zip_size(bpage);
page_no = buf_page_get_page_no(bpage);
/* Note that the following call will acquire mutex_exit(block_mutex);
an S-latch on the page */
btr_search_drop_page_hash_when_freed( /* Note that the following call will acquire
id, zip_size, page_no); an S-latch on the page */
goto scan_again;
}
if (bpage->oldest_modification != 0) { btr_search_drop_page_hash_when_freed(
id, zip_size, page_no);
goto scan_again;
}
buf_flush_remove(bpage); if (bpage->oldest_modification != 0) {
}
/* Remove from the LRU list. */ buf_flush_remove(bpage);
}
if (buf_LRU_block_remove_hashed_page(bpage, TRUE) /* Remove from the LRU list. */
!= BUF_BLOCK_ZIP_FREE) {
buf_LRU_block_free_hashed_page((buf_block_t*)
bpage);
} else {
/* The block_mutex should have been
released by buf_LRU_block_remove_hashed_page()
when it returns BUF_BLOCK_ZIP_FREE. */
ut_ad(block_mutex == &buf_pool_zip_mutex);
ut_ad(!mutex_own(block_mutex));
if (prev_bpage_buf_fix) {
/* We temporarily buffer-fixed
prev_bpage, so that
buf_buddy_free() could not
relocate it, in case it was a
compressed-only block
descriptor. */
mutex_enter(block_mutex);
ut_ad(prev_bpage->buf_fix_count > 0);
prev_bpage->buf_fix_count--;
mutex_exit(block_mutex);
}
goto next_page_no_mutex; if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
} != BUF_BLOCK_ZIP_FREE) {
next_page: buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
mutex_exit(block_mutex); mutex_exit(block_mutex);
} else {
/* The block_mutex should have been released
by buf_LRU_block_remove_hashed_page() when it
returns BUF_BLOCK_ZIP_FREE. */
ut_ad(block_mutex == &buf_pool_zip_mutex);
ut_ad(!mutex_own(block_mutex));
} }
next_page:
next_page_no_mutex:
bpage = prev_bpage; bpage = prev_bpage;
} }
...@@ -574,7 +530,7 @@ buf_LRU_free_from_unzip_LRU_list( ...@@ -574,7 +530,7 @@ buf_LRU_free_from_unzip_LRU_list(
UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0); UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) { block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
enum buf_lru_free_block_status freed; ibool freed;
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->in_unzip_LRU_list); ut_ad(block->in_unzip_LRU_list);
...@@ -584,24 +540,9 @@ buf_LRU_free_from_unzip_LRU_list( ...@@ -584,24 +540,9 @@ buf_LRU_free_from_unzip_LRU_list(
freed = buf_LRU_free_block(&block->page, FALSE); freed = buf_LRU_free_block(&block->page, FALSE);
mutex_exit(&block->mutex); mutex_exit(&block->mutex);
switch (freed) { if (freed) {
case BUF_LRU_FREED:
return(TRUE); return(TRUE);
case BUF_LRU_CANNOT_RELOCATE:
/* If we failed to relocate, try
regular LRU eviction. */
return(FALSE);
case BUF_LRU_NOT_FREED:
/* The block was buffer-fixed or I/O-fixed.
Keep looking. */
continue;
} }
/* inappropriate return value from
buf_LRU_free_block() */
ut_error;
} }
return(FALSE); return(FALSE);
...@@ -632,10 +573,9 @@ buf_LRU_free_from_common_LRU_list( ...@@ -632,10 +573,9 @@ buf_LRU_free_from_common_LRU_list(
UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0); UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) { bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
enum buf_lru_free_block_status freed; ibool freed;
unsigned accessed; unsigned accessed;
mutex_t* block_mutex mutex_t* block_mutex = buf_page_get_mutex(bpage);
= buf_page_get_mutex(bpage);
ut_ad(buf_page_in_file(bpage)); ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list); ut_ad(bpage->in_LRU_list);
...@@ -645,8 +585,7 @@ buf_LRU_free_from_common_LRU_list( ...@@ -645,8 +585,7 @@ buf_LRU_free_from_common_LRU_list(
freed = buf_LRU_free_block(bpage, TRUE); freed = buf_LRU_free_block(bpage, TRUE);
mutex_exit(block_mutex); mutex_exit(block_mutex);
switch (freed) { if (freed) {
case BUF_LRU_FREED:
/* Keep track of pages that are evicted without /* Keep track of pages that are evicted without
ever being accessed. This gives us a measure of ever being accessed. This gives us a measure of
the effectiveness of readahead */ the effectiveness of readahead */
...@@ -654,21 +593,7 @@ buf_LRU_free_from_common_LRU_list( ...@@ -654,21 +593,7 @@ buf_LRU_free_from_common_LRU_list(
++buf_pool->stat.n_ra_pages_evicted; ++buf_pool->stat.n_ra_pages_evicted;
} }
return(TRUE); return(TRUE);
case BUF_LRU_NOT_FREED:
/* The block was dirty, buffer-fixed, or I/O-fixed.
Keep looking. */
continue;
case BUF_LRU_CANNOT_RELOCATE:
/* This should never occur, because we
want to discard the compressed page too. */
break;
} }
/* inappropriate return value from
buf_LRU_free_block() */
ut_error;
} }
return(FALSE); return(FALSE);
...@@ -1350,17 +1275,16 @@ buf_LRU_make_block_old( ...@@ -1350,17 +1275,16 @@ buf_LRU_make_block_old(
Try to free a block. If bpage is a descriptor of a compressed-only Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well. page, the descriptor object will be freed as well.
NOTE: If this function returns BUF_LRU_FREED, it will temporarily NOTE: If this function returns TRUE, it will temporarily
release buf_pool_mutex. Furthermore, the page frame will no longer be release buf_pool_mutex. Furthermore, the page frame will no longer be
accessible via bpage. accessible via bpage.
The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
release these two mutexes after the call. No other release these two mutexes after the call. No other
buf_page_get_mutex() may be held when calling this function. buf_page_get_mutex() may be held when calling this function.
@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or @return TRUE if freed, FALSE otherwise. */
BUF_LRU_NOT_FREED otherwise. */
UNIV_INTERN UNIV_INTERN
enum buf_lru_free_block_status ibool
buf_LRU_free_block( buf_LRU_free_block(
/*===============*/ /*===============*/
buf_page_t* bpage, /*!< in: block to be freed */ buf_page_t* bpage, /*!< in: block to be freed */
...@@ -1385,7 +1309,7 @@ buf_LRU_free_block( ...@@ -1385,7 +1309,7 @@ buf_LRU_free_block(
if (!buf_page_can_relocate(bpage)) { if (!buf_page_can_relocate(bpage)) {
/* Do not free buffer-fixed or I/O-fixed blocks. */ /* Do not free buffer-fixed or I/O-fixed blocks. */
return(BUF_LRU_NOT_FREED); return(FALSE);
} }
#ifdef UNIV_IBUF_COUNT_DEBUG #ifdef UNIV_IBUF_COUNT_DEBUG
...@@ -1397,7 +1321,7 @@ buf_LRU_free_block( ...@@ -1397,7 +1321,7 @@ buf_LRU_free_block(
/* Do not completely free dirty blocks. */ /* Do not completely free dirty blocks. */
if (bpage->oldest_modification) { if (bpage->oldest_modification) {
return(BUF_LRU_NOT_FREED); return(FALSE);
} }
} else if (bpage->oldest_modification) { } else if (bpage->oldest_modification) {
/* Do not completely free dirty blocks. */ /* Do not completely free dirty blocks. */
...@@ -1405,7 +1329,7 @@ buf_LRU_free_block( ...@@ -1405,7 +1329,7 @@ buf_LRU_free_block(
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) { if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
ut_ad(buf_page_get_state(bpage) ut_ad(buf_page_get_state(bpage)
== BUF_BLOCK_ZIP_DIRTY); == BUF_BLOCK_ZIP_DIRTY);
return(BUF_LRU_NOT_FREED); return(FALSE);
} }
goto alloc; goto alloc;
...@@ -1414,14 +1338,8 @@ buf_LRU_free_block( ...@@ -1414,14 +1338,8 @@ buf_LRU_free_block(
If it cannot be allocated (without freeing a block If it cannot be allocated (without freeing a block
from the LRU list), refuse to free bpage. */ from the LRU list), refuse to free bpage. */
alloc: alloc:
buf_pool_mutex_exit_forbid(); b = buf_page_alloc_descriptor();
b = buf_buddy_alloc(sizeof *b, NULL); ut_a(b);
buf_pool_mutex_exit_allow();
if (UNIV_UNLIKELY(!b)) {
return(BUF_LRU_CANNOT_RELOCATE);
}
memcpy(b, bpage, sizeof *b); memcpy(b, bpage, sizeof *b);
} }
...@@ -1589,7 +1507,7 @@ alloc: ...@@ -1589,7 +1507,7 @@ alloc:
mutex_enter(block_mutex); mutex_enter(block_mutex);
} }
return(BUF_LRU_FREED); return(TRUE);
} }
/******************************************************************//** /******************************************************************//**
...@@ -1809,10 +1727,8 @@ buf_LRU_block_remove_hashed_page( ...@@ -1809,10 +1727,8 @@ buf_LRU_block_remove_hashed_page(
buf_pool_mutex_exit_forbid(); buf_pool_mutex_exit_forbid();
buf_buddy_free(bpage->zip.data, buf_buddy_free(bpage->zip.data,
page_zip_get_size(&bpage->zip)); page_zip_get_size(&bpage->zip));
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof(*bpage));
buf_pool_mutex_exit_allow(); buf_pool_mutex_exit_allow();
UNIV_MEM_UNDESC(bpage); buf_page_free_descriptor(bpage);
return(BUF_BLOCK_ZIP_FREE); return(BUF_BLOCK_ZIP_FREE);
case BUF_BLOCK_FILE_PAGE: case BUF_BLOCK_FILE_PAGE:
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -37,24 +37,19 @@ Created December 2006 by Marko Makela ...@@ -37,24 +37,19 @@ Created December 2006 by Marko Makela
/**********************************************************************//** /**********************************************************************//**
Allocate a block. The thread calling this function must hold Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any buf_pool_mutex and must not hold buf_pool_zip_mutex or any
block->mutex. The buf_pool_mutex may only be released and reacquired block->mutex. The buf_pool_mutex may be released and reacquired.
if lru != NULL. This function should only be used for allocating This function should only be used for allocating compressed page frames.
compressed page frames or control blocks (buf_page_t). Allocated @return allocated block, never NULL */
control blocks must be properly initialized immediately after
buf_buddy_alloc() has returned the memory, before releasing
buf_pool_mutex.
@return allocated block, possibly NULL if lru == NULL */
UNIV_INLINE UNIV_INLINE
void* void*
buf_buddy_alloc( buf_buddy_alloc(
/*============*/ /*============*/
ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */ ulint size, /*!< in: compressed page size
(between PAGE_ZIP_MIN_SIZE and UNIV_PAGE_SIZE) */
ibool* lru) /*!< in: pointer to a variable that will be assigned ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released, and buf_pool_mutex was temporarily released */
or NULL if the LRU list should not be used */ __attribute__((malloc, nonnull));
__attribute__((malloc));
/**********************************************************************//** /**********************************************************************//**
Release a block. */ Release a block. */
UNIV_INLINE UNIV_INLINE
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -36,8 +36,8 @@ Created December 2006 by Marko Makela ...@@ -36,8 +36,8 @@ Created December 2006 by Marko Makela
/**********************************************************************//** /**********************************************************************//**
Allocate a block. The thread calling this function must hold Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex. buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
The buf_pool_mutex may only be released and reacquired if lru != NULL. The buf_pool_mutex may be released and reacquired.
@return allocated block, possibly NULL if lru==NULL */ @return allocated block, never NULL */
UNIV_INTERN UNIV_INTERN
void* void*
buf_buddy_alloc_low( buf_buddy_alloc_low(
...@@ -46,9 +46,8 @@ buf_buddy_alloc_low( ...@@ -46,9 +46,8 @@ buf_buddy_alloc_low(
or BUF_BUDDY_SIZES */ or BUF_BUDDY_SIZES */
ibool* lru) /*!< in: pointer to a variable that will be assigned ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released, and buf_pool_mutex was temporarily released */
or NULL if the LRU list should not be used */ __attribute__((malloc, nonnull));
__attribute__((malloc));
/**********************************************************************//** /**********************************************************************//**
Deallocate a block. */ Deallocate a block. */
...@@ -74,6 +73,8 @@ buf_buddy_get_slot( ...@@ -74,6 +73,8 @@ buf_buddy_get_slot(
ulint i; ulint i;
ulint s; ulint s;
ut_ad(size >= PAGE_ZIP_MIN_SIZE);
for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) { for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) {
} }
...@@ -84,26 +85,25 @@ buf_buddy_get_slot( ...@@ -84,26 +85,25 @@ buf_buddy_get_slot(
/**********************************************************************//** /**********************************************************************//**
Allocate a block. The thread calling this function must hold Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any buf_pool_mutex and must not hold buf_pool_zip_mutex or any
block->mutex. The buf_pool_mutex may only be released and reacquired block->mutex. The buf_pool_mutex may be released and reacquired.
if lru != NULL. This function should only be used for allocating This function should only be used for allocating compressed page frames.
compressed page frames or control blocks (buf_page_t). Allocated @return allocated block, never NULL */
control blocks must be properly initialized immediately after
buf_buddy_alloc() has returned the memory, before releasing
buf_pool_mutex.
@return allocated block, possibly NULL if lru == NULL */
UNIV_INLINE UNIV_INLINE
void* void*
buf_buddy_alloc( buf_buddy_alloc(
/*============*/ /*============*/
ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */ ulint size, /*!< in: compressed page size
(between PAGE_ZIP_MIN_SIZE and UNIV_PAGE_SIZE) */
ibool* lru) /*!< in: pointer to a variable that will be assigned ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released, and buf_pool_mutex was temporarily released */
or NULL if the LRU list should not be used */
{ {
ut_ad(buf_pool_mutex_own()); ut_ad(buf_pool_mutex_own());
ut_ad(ut_is_2pow(size));
ut_ad(size >= PAGE_ZIP_MIN_SIZE);
ut_ad(size <= UNIV_PAGE_SIZE);
return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru)); return((byte*) buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
} }
/**********************************************************************//** /**********************************************************************//**
...@@ -117,6 +117,9 @@ buf_buddy_free( ...@@ -117,6 +117,9 @@ buf_buddy_free(
ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */ ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
{ {
ut_ad(buf_pool_mutex_own()); ut_ad(buf_pool_mutex_own());
ut_ad(ut_is_2pow(size));
ut_ad(size >= PAGE_ZIP_MIN_SIZE);
ut_ad(size <= UNIV_PAGE_SIZE);
buf_buddy_free_low(buf, buf_buddy_get_slot(size)); buf_buddy_free_low(buf, buf_buddy_get_slot(size));
} }
......
...@@ -156,6 +156,23 @@ UNIV_INLINE ...@@ -156,6 +156,23 @@ UNIV_INLINE
ib_uint64_t ib_uint64_t
buf_pool_get_oldest_modification(void); buf_pool_get_oldest_modification(void);
/*==================================*/ /*==================================*/
/********************************************************************//**
Allocates a buf_page_t descriptor. This function must succeed. In case
of failure we assert in this function. */
UNIV_INLINE
buf_page_t*
buf_page_alloc_descriptor(void)
/*===========================*/
__attribute__((malloc));
/********************************************************************//**
Free a buf_page_t descriptor. */
UNIV_INLINE
void
buf_page_free_descriptor(
/*=====================*/
buf_page_t* bpage) /*!< in: bpage descriptor to free. */
__attribute__((nonnull));
/********************************************************************//** /********************************************************************//**
Allocates a buffer block. Allocates a buffer block.
@return own: the allocated block, in state BUF_BLOCK_MEMORY */ @return own: the allocated block, in state BUF_BLOCK_MEMORY */
......
...@@ -714,6 +714,35 @@ buf_block_get_lock_hash_val( ...@@ -714,6 +714,35 @@ buf_block_get_lock_hash_val(
return(block->lock_hash_val); return(block->lock_hash_val);
} }
/********************************************************************//**
Allocates a buf_page_t descriptor. This function must succeed. In case
of failure we assert in this function.
@return: the allocated descriptor. */
UNIV_INLINE
buf_page_t*
buf_page_alloc_descriptor(void)
/*===========================*/
{
buf_page_t* bpage;
bpage = (buf_page_t*) ut_malloc(sizeof *bpage);
ut_d(memset(bpage, 0, sizeof *bpage));
UNIV_MEM_ALLOC(bpage, sizeof *bpage);
return(bpage);
}
/********************************************************************//**
Free a buf_page_t descriptor. */
UNIV_INLINE
void
buf_page_free_descriptor(
/*=====================*/
buf_page_t* bpage) /*!< in: bpage descriptor to free. */
{
ut_free(bpage);
}
/********************************************************************//** /********************************************************************//**
Allocates a buffer block. Allocates a buffer block.
@return own: the allocated block, in state BUF_BLOCK_MEMORY */ @return own: the allocated block, in state BUF_BLOCK_MEMORY */
......
...@@ -30,18 +30,6 @@ Created 11/5/1995 Heikki Tuuri ...@@ -30,18 +30,6 @@ Created 11/5/1995 Heikki Tuuri
#include "ut0byte.h" #include "ut0byte.h"
#include "buf0types.h" #include "buf0types.h"
/** The return type of buf_LRU_free_block() */
enum buf_lru_free_block_status {
/** freed */
BUF_LRU_FREED = 0,
/** not freed because the caller asked to remove the
uncompressed frame but the control block cannot be
relocated */
BUF_LRU_CANNOT_RELOCATE,
/** not freed because of some other reason */
BUF_LRU_NOT_FREED
};
/******************************************************************//** /******************************************************************//**
Tries to remove LRU flushed blocks from the end of the LRU list and put them Tries to remove LRU flushed blocks from the end of the LRU list and put them
to the free list. This is beneficial for the efficiency of the insert buffer to the free list. This is beneficial for the efficiency of the insert buffer
...@@ -98,17 +86,16 @@ buf_LRU_insert_zip_clean( ...@@ -98,17 +86,16 @@ buf_LRU_insert_zip_clean(
Try to free a block. If bpage is a descriptor of a compressed-only Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well. page, the descriptor object will be freed as well.
NOTE: If this function returns BUF_LRU_FREED, it will temporarily NOTE: If this function returns TRUE, it will temporarily
release buf_pool_mutex. Furthermore, the page frame will no longer be release buf_pool_mutex. Furthermore, the page frame will no longer be
accessible via bpage. accessible via bpage.
The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
release these two mutexes after the call. No other release these two mutexes after the call. No other
buf_page_get_mutex() may be held when calling this function. buf_page_get_mutex() may be held when calling this function.
@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or @return TRUE if freed, FALSE otherwise. */
BUF_LRU_NOT_FREED otherwise. */
UNIV_INTERN UNIV_INTERN
enum buf_lru_free_block_status ibool
buf_LRU_free_block( buf_LRU_free_block(
/*===============*/ /*===============*/
buf_page_t* bpage, /*!< in: block to be freed */ buf_page_t* bpage, /*!< in: block to be freed */
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -26,6 +26,8 @@ Created 11/17/1995 Heikki Tuuri ...@@ -26,6 +26,8 @@ Created 11/17/1995 Heikki Tuuri
#ifndef buf0types_h #ifndef buf0types_h
#define buf0types_h #define buf0types_h
#include "page0types.h"
/** Buffer page (uncompressed or compressed) */ /** Buffer page (uncompressed or compressed) */
typedef struct buf_page_struct buf_page_t; typedef struct buf_page_struct buf_page_t;
/** Buffer block for which an uncompressed page exists */ /** Buffer block for which an uncompressed page exists */
...@@ -58,17 +60,10 @@ enum buf_io_fix { ...@@ -58,17 +60,10 @@ enum buf_io_fix {
/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */ /** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
/* @{ */ /* @{ */
#if UNIV_WORD_SIZE <= 4 /* 32-bit system */ #define BUF_BUDDY_LOW_SHIFT PAGE_ZIP_MIN_SIZE_SHIFT
/** Base-2 logarithm of the smallest buddy block size */
# define BUF_BUDDY_LOW_SHIFT 6
#else /* 64-bit system */
/** Base-2 logarithm of the smallest buddy block size */
# define BUF_BUDDY_LOW_SHIFT 7
#endif
#define BUF_BUDDY_LOW (1 << BUF_BUDDY_LOW_SHIFT) #define BUF_BUDDY_LOW (1 << BUF_BUDDY_LOW_SHIFT)
/*!< minimum block size in the binary
buddy system; must be at least
sizeof(buf_page_t) */
#define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT) #define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
/*!< number of buddy sizes */ /*!< number of buddy sizes */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment