Commit f890ec15 authored by marko's avatar marko

branches/zip: Use the buddy allocator for managing compressed pages.

There is something wrong with the management of compressed-only blocks
(BUF_BLOCK_ZIP_PAGE).  To disable the creation of such blocks, set zip=TRUE
in buf_LRU_block_remove_hashed_page().

buf_LRU_block_remove_hashed_page(): Release buf_pool->zip_mutex when
freeing a compressed-only page and its control block, with buf_buddy_free().
Adapt callers.

buf_LRU_block_free_hashed_page(): Change the parameter type from buf_page_t*
to buf_block_t*.

buf_LRU_free_block(): Move below the definition of buf_LRU_add_block_low().
Allocate block descriptor for compressed-only blocks.

buf_LRU_block_free_non_file_page(): Replace ut_free() with buf_buddy_free().

buf_zip_decompress(): New function, split from buf_page_io_complete().

buf_page_init_for_read(): Do not allocate the compressed page until it is
really needed, to avoid the overhead of the buddy allocator.  Decompress
compressed-only blocks that are needed again.

buf_page_create(): Do not allocate the compressed page until it is
really needed, to avoid the overhead of the buddy allocator.

buf_validate(): Replace some equality tests on the lengths of the LRU
list and the flush lists with greater-or-equal tests, since the counted
numbers do not include control blocks for compressed-only pages.
parent 0b89bb79
......@@ -724,7 +724,7 @@ buf_chunk_contains_zip(
#ifdef UNIV_SYNC_DEBUG
ut_a(mutex_own(&buf_pool->mutex));
#endif /* UNIV_SYNC_DEBUG */
block = chunk->blocks;
for (i = chunk->size; i--; block++) {
......@@ -1974,6 +1974,53 @@ buf_page_init(
#endif /* UNIV_DEBUG_FILE_ACCESSES */
}
/************************************************************************
Decompress a block. */
static
ibool
buf_zip_decompress(
/*===============*/
/* out: TRUE if successful */
buf_block_t* block) /* in/out: block */
{
const byte* frame = block->page.zip.data;
ut_ad(buf_block_get_zip_size(block));
ut_a(buf_block_get_space(block) != 0);
switch (fil_page_get_type(frame)) {
case FIL_PAGE_INDEX:
if (page_zip_decompress(&block->page.zip,
block->frame)) {
return(TRUE);
}
fprintf(stderr,
"InnoDB: unable to decompress space %lu page %lu\n",
(ulong) block->page.space,
(ulong) block->page.offset);
return(FALSE);
case FIL_PAGE_TYPE_ALLOCATED:
case FIL_PAGE_INODE:
case FIL_PAGE_IBUF_BITMAP:
case FIL_PAGE_TYPE_FSP_HDR:
case FIL_PAGE_TYPE_XDES:
case FIL_PAGE_TYPE_ZBLOB:
/* Copy to uncompressed storage. */
memcpy(block->frame, frame,
buf_block_get_zip_size(block));
return(TRUE);
}
ut_print_timestamp(stderr);
fprintf(stderr,
"InnoDB: unknown compressed page"
" type %lu\n",
fil_page_get_type(frame));
return(FALSE);
}
/************************************************************************
Function which inits a page for read to the buffer buf_pool. If the page is
(1) already in buf_pool, or
......@@ -2025,7 +2072,7 @@ buf_page_init_for_read(
ut_ad(mode == BUF_READ_ANY_PAGE);
}
block = buf_LRU_get_free_block(zip_size);
block = buf_LRU_get_free_block(0);
ut_a(block);
......@@ -2058,8 +2105,6 @@ buf_page_init_for_read(
/* Move the compressed page from bpage to block,
and uncompress it. */
buf_buddy_free(block->page.zip.data, zip_size);
mutex_enter(&buf_pool->zip_mutex);
memcpy(&block->page, bpage, sizeof *bpage);
block->page.state = BUF_BLOCK_FILE_PAGE;
......@@ -2093,8 +2138,18 @@ buf_page_init_for_read(
buf_buddy_free(bpage, sizeof *bpage);
mutex_exit(&block->mutex);
mutex_exit(&buf_pool->zip_mutex);
break;
mutex_exit(&buf_pool->mutex);
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
mtr_commit(&mtr);
}
buf_zip_decompress(block);
return(NULL);
case BUF_BLOCK_FILE_PAGE:
break;
case BUF_BLOCK_ZIP_FREE:
......@@ -2126,6 +2181,11 @@ buf_page_init_for_read(
ut_ad(block);
if (zip_size) {
page_zip_set_size(&block->page.zip, zip_size);
block->page.zip.data = buf_buddy_alloc(zip_size, TRUE);
}
buf_page_init(space, offset, block);
/* The block must be put to the LRU list, to the old blocks */
......@@ -2179,7 +2239,7 @@ buf_page_create(
ut_ad(mtr);
ut_ad(space || !zip_size);
free_block = buf_LRU_get_free_block(zip_size);
free_block = buf_LRU_get_free_block(0);
mutex_enter(&(buf_pool->mutex));
......@@ -2214,6 +2274,11 @@ buf_page_create(
block = free_block;
if (zip_size) {
page_zip_set_size(&block->page.zip, zip_size);
block->page.zip.data = buf_buddy_alloc(zip_size, TRUE);
}
mutex_enter(&block->mutex);
buf_page_init(space, offset, block);
......@@ -2293,36 +2358,10 @@ buf_page_io_complete(
byte* frame;
if (buf_block_get_zip_size(block)) {
ut_a(buf_block_get_space(block) != 0);
frame = block->page.zip.data;
switch (fil_page_get_type(frame)) {
case FIL_PAGE_INDEX:
if (block->frame) {
if (!page_zip_decompress(
&block->page.zip,
block->frame)) {
goto corrupt;
}
}
break;
case FIL_PAGE_TYPE_ALLOCATED:
case FIL_PAGE_INODE:
case FIL_PAGE_IBUF_BITMAP:
case FIL_PAGE_TYPE_FSP_HDR:
case FIL_PAGE_TYPE_XDES:
case FIL_PAGE_TYPE_ZBLOB:
/* Copy to uncompressed storage. */
memcpy(block->frame, frame,
buf_block_get_zip_size(block));
break;
default:
ut_print_timestamp(stderr);
fprintf(stderr,
"InnoDB: unknown compressed page"
" type %lu\n",
fil_page_get_type(frame));
if (!buf_zip_decompress(block)) {
goto corrupt;
}
} else {
......@@ -2636,7 +2675,7 @@ buf_validate(void)
ut_error;
}
ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
ut_a(UT_LIST_GET_LEN(buf_pool->LRU) >= n_lru);
if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
fprintf(stderr, "Free list len %lu, free blocks %lu\n",
(ulong) UT_LIST_GET_LEN(buf_pool->free),
......@@ -2645,9 +2684,9 @@ buf_validate(void)
}
ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] >= n_single_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] >= n_list_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] >= n_lru_flush);
mutex_exit(&(buf_pool->mutex));
......
......@@ -49,7 +49,9 @@ frames in the buffer pool, we set this to TRUE */
ibool buf_lru_switched_on_innodb_mon = FALSE;
/**********************************************************************
Takes a block out of the LRU list and page hash table. */
Takes a block out of the LRU list and page hash table.
If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
the object will be freed and buf_pool->zip_mutex will be released. */
static
enum buf_page_state
buf_LRU_block_remove_hashed_page(
......@@ -69,7 +71,7 @@ static
void
buf_LRU_block_free_hashed_page(
/*===========================*/
buf_page_t* block); /* in: block, must contain a file page and
buf_block_t* block); /* in: block, must contain a file page and
be in a state where it can be freed */
/**********************************************************************
......@@ -146,11 +148,15 @@ buf_LRU_invalidate_tablespace(
/* Remove from the LRU list */
if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
!= BUF_BLOCK_ZIP_FREE) {
buf_LRU_block_free_hashed_page(bpage);
buf_LRU_block_free_hashed_page((buf_block_t*)
bpage);
} else {
goto next_page2;
}
}
next_page:
mutex_exit(block_mutex);
next_page2:
bpage = prev_bpage;
}
......@@ -232,59 +238,6 @@ buf_LRU_insert_zip_clean(
}
}
/**********************************************************************
Try to free a block. */
ibool
buf_LRU_free_block(
/*===============*/
/* out: TRUE if freed */
buf_page_t* bpage, /* in: block to be freed */
ibool zip) /* in: TRUE if should remove also the
compressed page of an uncompressed page */
{
mutex_t* block_mutex = buf_page_get_mutex(bpage);
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&buf_pool->mutex));
ut_ad(mutex_own(block_mutex));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
if (!buf_flush_ready_for_replace(bpage)) {
return(FALSE);
}
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr, "Putting space %lu page %lu to free list\n",
(ulong) buf_page_get_space(bpage),
(ulong) buf_page_get_page_no(bpage));
}
#endif /* UNIV_DEBUG */
if (buf_LRU_block_remove_hashed_page(bpage, zip)
!= BUF_BLOCK_ZIP_FREE) {
mutex_exit(&(buf_pool->mutex));
mutex_exit(block_mutex);
/* Remove possible adaptive hash index on the page */
btr_search_drop_page_hash_index((buf_block_t*) bpage);
ut_a(bpage->buf_fix_count == 0);
mutex_enter(&(buf_pool->mutex));
mutex_enter(block_mutex);
buf_LRU_block_free_hashed_page(bpage);
}
return(TRUE);
}
/**********************************************************************
Look for a replaceable block from the end of the LRU list and put it to
the free list if found. */
......@@ -531,8 +484,7 @@ buf_LRU_get_free_block(
if (zip_size) {
page_zip_set_size(&block->page.zip, zip_size);
/* TODO: allocate zip from an aligned pool */
block->page.zip.data = ut_malloc(zip_size);
block->page.zip.data = buf_buddy_alloc(zip_size, TRUE);
} else {
page_zip_set_size(&block->page.zip, 0);
block->page.zip.data = NULL;
......@@ -919,6 +871,87 @@ buf_LRU_make_block_old(
buf_LRU_add_block_to_end_low(bpage);
}
/**********************************************************************
Try to free a block. */
ibool
buf_LRU_free_block(
/*===============*/
/* out: TRUE if freed */
buf_page_t* bpage, /* in: block to be freed */
ibool zip) /* in: TRUE if should remove also the
compressed page of an uncompressed page */
{
mutex_t* block_mutex = buf_page_get_mutex(bpage);
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&buf_pool->mutex));
ut_ad(mutex_own(block_mutex));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
if (!buf_flush_ready_for_replace(bpage)) {
return(FALSE);
}
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr, "Putting space %lu page %lu to free list\n",
(ulong) buf_page_get_space(bpage),
(ulong) buf_page_get_page_no(bpage));
}
#endif /* UNIV_DEBUG */
if (buf_LRU_block_remove_hashed_page(bpage, zip)
!= BUF_BLOCK_ZIP_FREE) {
mutex_exit(&(buf_pool->mutex));
mutex_exit(block_mutex);
/* Remove possible adaptive hash index on the page */
btr_search_drop_page_hash_index((buf_block_t*) bpage);
ut_a(bpage->buf_fix_count == 0);
mutex_enter(&(buf_pool->mutex));
mutex_enter(block_mutex);
if (bpage->zip.data) {
/* Keep the compressed page.
Allocate a block descriptor for it. */
buf_page_t* b = buf_buddy_alloc(sizeof *b, FALSE);
if (b) {
ulint fold;
memcpy(b, bpage, sizeof *b);
b->state = BUF_BLOCK_ZIP_PAGE;
fold = buf_page_address_fold(b->space,
b->offset);
HASH_INSERT(buf_page_t, hash,
buf_pool->page_hash, fold, b);
buf_LRU_add_block_low(b, TRUE);
buf_LRU_insert_zip_clean(b);
bpage->zip.data = NULL;
page_zip_set_size(&bpage->zip, 0);
}
}
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
} else {
mutex_enter(block_mutex);
}
return(TRUE);
}
/**********************************************************************
Puts a block back to the free list. */
......@@ -927,6 +960,7 @@ buf_LRU_block_free_non_file_page(
/*=============================*/
buf_block_t* block) /* in: block, must not contain a file page */
{
void* data;
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(buf_pool->mutex)));
ut_ad(mutex_own(&block->mutex));
......@@ -954,10 +988,11 @@ buf_LRU_block_free_non_file_page(
memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4);
#endif
if (block->page.zip.data) {
/* TODO: return zip to an aligned pool */
ut_free(block->page.zip.data);
data = block->page.zip.data;
if (data) {
block->page.zip.data = NULL;
buf_buddy_free(data, page_zip_get_size(&block->page.zip));
page_zip_set_size(&block->page.zip, 0);
}
......@@ -968,7 +1003,9 @@ buf_LRU_block_free_non_file_page(
}
/**********************************************************************
Takes a block out of the LRU list and page hash table. */
Takes a block out of the LRU list and page hash table.
If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
the object will be freed and buf_pool->zip_mutex will be released. */
static
enum buf_page_state
buf_LRU_block_remove_hashed_page(
......@@ -1053,7 +1090,15 @@ buf_LRU_block_remove_hashed_page(
memset(bpage->zip.data + FIL_PAGE_OFFSET, 0xff, 4);
memset(bpage->zip.data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
0xff, 4);
break;
UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
mutex_exit(&buf_pool->zip_mutex);
buf_buddy_free(bpage->zip.data,
page_zip_get_size(&bpage->zip));
buf_buddy_free(bpage, sizeof(*bpage));
return(BUF_BLOCK_ZIP_FREE);
case BUF_BLOCK_FILE_PAGE:
memset(((buf_block_t*) bpage)->frame
+ FIL_PAGE_OFFSET, 0xff, 4);
......@@ -1061,10 +1106,12 @@ buf_LRU_block_remove_hashed_page(
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);
buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH);
if (bpage->zip.data) {
if (zip && bpage->zip.data) {
/* Free the compressed page. */
ut_free(bpage->zip.data);
void* data = bpage->zip.data;
bpage->zip.data = NULL;
buf_buddy_free(data, page_zip_get_size(&bpage->zip));
page_zip_set_size(&bpage->zip, 0);
}
......@@ -1089,16 +1136,16 @@ static
void
buf_LRU_block_free_hashed_page(
/*===========================*/
buf_page_t* bpage) /* in: block, must contain a file page and
buf_block_t* block) /* in: block, must contain a file page and
be in a state where it can be freed */
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(buf_pool->mutex)));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(mutex_own(&block->mutex));
#endif /* UNIV_SYNC_DEBUG */
buf_page_set_state(bpage, BUF_BLOCK_MEMORY);
buf_block_set_state(block, BUF_BLOCK_MEMORY);
buf_LRU_block_free_non_file_page((buf_block_t*) bpage);
buf_LRU_block_free_non_file_page(block);
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment