Commit f890ec15 authored by marko's avatar marko

branches/zip: Use the buddy allocator for managing compressed pages.

There is something wrong with the management of compressed-only blocks
(BUF_BLOCK_ZIP_PAGE).  To disable the creation of such blocks, set zip=TRUE
in buf_LRU_block_remove_hashed_page().

buf_LRU_block_remove_hashed_page(): Release buf_pool->zip_mutex when
freeing a compressed-only page and its control block, with buf_buddy_free().
Adapt callers.

buf_LRU_block_free_hashed_page(): Change the parameter type from buf_page_t*
to buf_block_t*.

buf_LRU_free_block(): Move below the definition of buf_LRU_add_block_low().
Allocate block descriptor for compressed-only blocks.

buf_LRU_block_free_non_file_page(): Replace ut_free() with buf_buddy_free().

buf_zip_decompress(): New function, split from buf_page_io_complete().

buf_page_init_for_read(): Do not allocate the compressed page until it is
really needed, to avoid the overhead of the buddy allocator.  Decompress
compressed-only blocks that are needed again.

buf_page_create(): Do not allocate the compressed page until it is
really needed, to avoid the overhead of the buddy allocator.

buf_validate(): Replace some equality tests on the lengths of the LRU
list and the flush lists with greater-or-equal tests, since the counted
numbers do not include control blocks for compressed-only pages.
parent 0b89bb79
...@@ -724,7 +724,7 @@ buf_chunk_contains_zip( ...@@ -724,7 +724,7 @@ buf_chunk_contains_zip(
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
ut_a(mutex_own(&buf_pool->mutex)); ut_a(mutex_own(&buf_pool->mutex));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
block = chunk->blocks; block = chunk->blocks;
for (i = chunk->size; i--; block++) { for (i = chunk->size; i--; block++) {
...@@ -1974,6 +1974,53 @@ buf_page_init( ...@@ -1974,6 +1974,53 @@ buf_page_init(
#endif /* UNIV_DEBUG_FILE_ACCESSES */ #endif /* UNIV_DEBUG_FILE_ACCESSES */
} }
/************************************************************************
Decompress a block. */
static
ibool
buf_zip_decompress(
/*===============*/
/* out: TRUE if successful */
buf_block_t* block) /* in/out: block */
{
const byte* frame = block->page.zip.data;
ut_ad(buf_block_get_zip_size(block));
ut_a(buf_block_get_space(block) != 0);
switch (fil_page_get_type(frame)) {
case FIL_PAGE_INDEX:
if (page_zip_decompress(&block->page.zip,
block->frame)) {
return(TRUE);
}
fprintf(stderr,
"InnoDB: unable to decompress space %lu page %lu\n",
(ulong) block->page.space,
(ulong) block->page.offset);
return(FALSE);
case FIL_PAGE_TYPE_ALLOCATED:
case FIL_PAGE_INODE:
case FIL_PAGE_IBUF_BITMAP:
case FIL_PAGE_TYPE_FSP_HDR:
case FIL_PAGE_TYPE_XDES:
case FIL_PAGE_TYPE_ZBLOB:
/* Copy to uncompressed storage. */
memcpy(block->frame, frame,
buf_block_get_zip_size(block));
return(TRUE);
}
ut_print_timestamp(stderr);
fprintf(stderr,
"InnoDB: unknown compressed page"
" type %lu\n",
fil_page_get_type(frame));
return(FALSE);
}
/************************************************************************ /************************************************************************
Function which inits a page for read to the buffer buf_pool. If the page is Function which inits a page for read to the buffer buf_pool. If the page is
(1) already in buf_pool, or (1) already in buf_pool, or
...@@ -2025,7 +2072,7 @@ buf_page_init_for_read( ...@@ -2025,7 +2072,7 @@ buf_page_init_for_read(
ut_ad(mode == BUF_READ_ANY_PAGE); ut_ad(mode == BUF_READ_ANY_PAGE);
} }
block = buf_LRU_get_free_block(zip_size); block = buf_LRU_get_free_block(0);
ut_a(block); ut_a(block);
...@@ -2058,8 +2105,6 @@ buf_page_init_for_read( ...@@ -2058,8 +2105,6 @@ buf_page_init_for_read(
/* Move the compressed page from bpage to block, /* Move the compressed page from bpage to block,
and uncompress it. */ and uncompress it. */
buf_buddy_free(block->page.zip.data, zip_size);
mutex_enter(&buf_pool->zip_mutex); mutex_enter(&buf_pool->zip_mutex);
memcpy(&block->page, bpage, sizeof *bpage); memcpy(&block->page, bpage, sizeof *bpage);
block->page.state = BUF_BLOCK_FILE_PAGE; block->page.state = BUF_BLOCK_FILE_PAGE;
...@@ -2093,8 +2138,18 @@ buf_page_init_for_read( ...@@ -2093,8 +2138,18 @@ buf_page_init_for_read(
buf_buddy_free(bpage, sizeof *bpage); buf_buddy_free(bpage, sizeof *bpage);
mutex_exit(&block->mutex);
mutex_exit(&buf_pool->zip_mutex); mutex_exit(&buf_pool->zip_mutex);
break; mutex_exit(&buf_pool->mutex);
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
mtr_commit(&mtr);
}
buf_zip_decompress(block);
return(NULL);
case BUF_BLOCK_FILE_PAGE: case BUF_BLOCK_FILE_PAGE:
break; break;
case BUF_BLOCK_ZIP_FREE: case BUF_BLOCK_ZIP_FREE:
...@@ -2126,6 +2181,11 @@ buf_page_init_for_read( ...@@ -2126,6 +2181,11 @@ buf_page_init_for_read(
ut_ad(block); ut_ad(block);
if (zip_size) {
page_zip_set_size(&block->page.zip, zip_size);
block->page.zip.data = buf_buddy_alloc(zip_size, TRUE);
}
buf_page_init(space, offset, block); buf_page_init(space, offset, block);
/* The block must be put to the LRU list, to the old blocks */ /* The block must be put to the LRU list, to the old blocks */
...@@ -2179,7 +2239,7 @@ buf_page_create( ...@@ -2179,7 +2239,7 @@ buf_page_create(
ut_ad(mtr); ut_ad(mtr);
ut_ad(space || !zip_size); ut_ad(space || !zip_size);
free_block = buf_LRU_get_free_block(zip_size); free_block = buf_LRU_get_free_block(0);
mutex_enter(&(buf_pool->mutex)); mutex_enter(&(buf_pool->mutex));
...@@ -2214,6 +2274,11 @@ buf_page_create( ...@@ -2214,6 +2274,11 @@ buf_page_create(
block = free_block; block = free_block;
if (zip_size) {
page_zip_set_size(&block->page.zip, zip_size);
block->page.zip.data = buf_buddy_alloc(zip_size, TRUE);
}
mutex_enter(&block->mutex); mutex_enter(&block->mutex);
buf_page_init(space, offset, block); buf_page_init(space, offset, block);
...@@ -2293,36 +2358,10 @@ buf_page_io_complete( ...@@ -2293,36 +2358,10 @@ buf_page_io_complete(
byte* frame; byte* frame;
if (buf_block_get_zip_size(block)) { if (buf_block_get_zip_size(block)) {
ut_a(buf_block_get_space(block) != 0);
frame = block->page.zip.data; frame = block->page.zip.data;
switch (fil_page_get_type(frame)) { if (!buf_zip_decompress(block)) {
case FIL_PAGE_INDEX:
if (block->frame) {
if (!page_zip_decompress(
&block->page.zip,
block->frame)) {
goto corrupt;
}
}
break;
case FIL_PAGE_TYPE_ALLOCATED:
case FIL_PAGE_INODE:
case FIL_PAGE_IBUF_BITMAP:
case FIL_PAGE_TYPE_FSP_HDR:
case FIL_PAGE_TYPE_XDES:
case FIL_PAGE_TYPE_ZBLOB:
/* Copy to uncompressed storage. */
memcpy(block->frame, frame,
buf_block_get_zip_size(block));
break;
default:
ut_print_timestamp(stderr);
fprintf(stderr,
"InnoDB: unknown compressed page"
" type %lu\n",
fil_page_get_type(frame));
goto corrupt; goto corrupt;
} }
} else { } else {
...@@ -2636,7 +2675,7 @@ buf_validate(void) ...@@ -2636,7 +2675,7 @@ buf_validate(void)
ut_error; ut_error;
} }
ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru); ut_a(UT_LIST_GET_LEN(buf_pool->LRU) >= n_lru);
if (UT_LIST_GET_LEN(buf_pool->free) != n_free) { if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
fprintf(stderr, "Free list len %lu, free blocks %lu\n", fprintf(stderr, "Free list len %lu, free blocks %lu\n",
(ulong) UT_LIST_GET_LEN(buf_pool->free), (ulong) UT_LIST_GET_LEN(buf_pool->free),
...@@ -2645,9 +2684,9 @@ buf_validate(void) ...@@ -2645,9 +2684,9 @@ buf_validate(void)
} }
ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush); ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush); ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] >= n_single_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush); ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] >= n_list_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush); ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] >= n_lru_flush);
mutex_exit(&(buf_pool->mutex)); mutex_exit(&(buf_pool->mutex));
......
...@@ -49,7 +49,9 @@ frames in the buffer pool, we set this to TRUE */ ...@@ -49,7 +49,9 @@ frames in the buffer pool, we set this to TRUE */
ibool buf_lru_switched_on_innodb_mon = FALSE; ibool buf_lru_switched_on_innodb_mon = FALSE;
/********************************************************************** /**********************************************************************
Takes a block out of the LRU list and page hash table. */ Takes a block out of the LRU list and page hash table.
If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
the object will be freed and buf_pool->zip_mutex will be released. */
static static
enum buf_page_state enum buf_page_state
buf_LRU_block_remove_hashed_page( buf_LRU_block_remove_hashed_page(
...@@ -69,7 +71,7 @@ static ...@@ -69,7 +71,7 @@ static
void void
buf_LRU_block_free_hashed_page( buf_LRU_block_free_hashed_page(
/*===========================*/ /*===========================*/
buf_page_t* block); /* in: block, must contain a file page and buf_block_t* block); /* in: block, must contain a file page and
be in a state where it can be freed */ be in a state where it can be freed */
/********************************************************************** /**********************************************************************
...@@ -146,11 +148,15 @@ buf_LRU_invalidate_tablespace( ...@@ -146,11 +148,15 @@ buf_LRU_invalidate_tablespace(
/* Remove from the LRU list */ /* Remove from the LRU list */
if (buf_LRU_block_remove_hashed_page(bpage, TRUE) if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
!= BUF_BLOCK_ZIP_FREE) { != BUF_BLOCK_ZIP_FREE) {
buf_LRU_block_free_hashed_page(bpage); buf_LRU_block_free_hashed_page((buf_block_t*)
bpage);
} else {
goto next_page2;
} }
} }
next_page: next_page:
mutex_exit(block_mutex); mutex_exit(block_mutex);
next_page2:
bpage = prev_bpage; bpage = prev_bpage;
} }
...@@ -232,59 +238,6 @@ buf_LRU_insert_zip_clean( ...@@ -232,59 +238,6 @@ buf_LRU_insert_zip_clean(
} }
} }
/**********************************************************************
Try to free a block. */
ibool
buf_LRU_free_block(
/*===============*/
/* out: TRUE if freed */
buf_page_t* bpage, /* in: block to be freed */
ibool zip) /* in: TRUE if should remove also the
compressed page of an uncompressed page */
{
mutex_t* block_mutex = buf_page_get_mutex(bpage);
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&buf_pool->mutex));
ut_ad(mutex_own(block_mutex));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
if (!buf_flush_ready_for_replace(bpage)) {
return(FALSE);
}
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr, "Putting space %lu page %lu to free list\n",
(ulong) buf_page_get_space(bpage),
(ulong) buf_page_get_page_no(bpage));
}
#endif /* UNIV_DEBUG */
if (buf_LRU_block_remove_hashed_page(bpage, zip)
!= BUF_BLOCK_ZIP_FREE) {
mutex_exit(&(buf_pool->mutex));
mutex_exit(block_mutex);
/* Remove possible adaptive hash index on the page */
btr_search_drop_page_hash_index((buf_block_t*) bpage);
ut_a(bpage->buf_fix_count == 0);
mutex_enter(&(buf_pool->mutex));
mutex_enter(block_mutex);
buf_LRU_block_free_hashed_page(bpage);
}
return(TRUE);
}
/********************************************************************** /**********************************************************************
Look for a replaceable block from the end of the LRU list and put it to Look for a replaceable block from the end of the LRU list and put it to
the free list if found. */ the free list if found. */
...@@ -531,8 +484,7 @@ buf_LRU_get_free_block( ...@@ -531,8 +484,7 @@ buf_LRU_get_free_block(
if (zip_size) { if (zip_size) {
page_zip_set_size(&block->page.zip, zip_size); page_zip_set_size(&block->page.zip, zip_size);
/* TODO: allocate zip from an aligned pool */ block->page.zip.data = buf_buddy_alloc(zip_size, TRUE);
block->page.zip.data = ut_malloc(zip_size);
} else { } else {
page_zip_set_size(&block->page.zip, 0); page_zip_set_size(&block->page.zip, 0);
block->page.zip.data = NULL; block->page.zip.data = NULL;
...@@ -919,6 +871,87 @@ buf_LRU_make_block_old( ...@@ -919,6 +871,87 @@ buf_LRU_make_block_old(
buf_LRU_add_block_to_end_low(bpage); buf_LRU_add_block_to_end_low(bpage);
} }
/**********************************************************************
Try to free a block. */
ibool
buf_LRU_free_block(
/*===============*/
/* out: TRUE if freed */
buf_page_t* bpage, /* in: block to be freed */
ibool zip) /* in: TRUE if should remove also the
compressed page of an uncompressed page */
{
mutex_t* block_mutex = buf_page_get_mutex(bpage);
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&buf_pool->mutex));
ut_ad(mutex_own(block_mutex));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
if (!buf_flush_ready_for_replace(bpage)) {
return(FALSE);
}
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr, "Putting space %lu page %lu to free list\n",
(ulong) buf_page_get_space(bpage),
(ulong) buf_page_get_page_no(bpage));
}
#endif /* UNIV_DEBUG */
if (buf_LRU_block_remove_hashed_page(bpage, zip)
!= BUF_BLOCK_ZIP_FREE) {
mutex_exit(&(buf_pool->mutex));
mutex_exit(block_mutex);
/* Remove possible adaptive hash index on the page */
btr_search_drop_page_hash_index((buf_block_t*) bpage);
ut_a(bpage->buf_fix_count == 0);
mutex_enter(&(buf_pool->mutex));
mutex_enter(block_mutex);
if (bpage->zip.data) {
/* Keep the compressed page.
Allocate a block descriptor for it. */
buf_page_t* b = buf_buddy_alloc(sizeof *b, FALSE);
if (b) {
ulint fold;
memcpy(b, bpage, sizeof *b);
b->state = BUF_BLOCK_ZIP_PAGE;
fold = buf_page_address_fold(b->space,
b->offset);
HASH_INSERT(buf_page_t, hash,
buf_pool->page_hash, fold, b);
buf_LRU_add_block_low(b, TRUE);
buf_LRU_insert_zip_clean(b);
bpage->zip.data = NULL;
page_zip_set_size(&bpage->zip, 0);
}
}
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
} else {
mutex_enter(block_mutex);
}
return(TRUE);
}
/********************************************************************** /**********************************************************************
Puts a block back to the free list. */ Puts a block back to the free list. */
...@@ -927,6 +960,7 @@ buf_LRU_block_free_non_file_page( ...@@ -927,6 +960,7 @@ buf_LRU_block_free_non_file_page(
/*=============================*/ /*=============================*/
buf_block_t* block) /* in: block, must not contain a file page */ buf_block_t* block) /* in: block, must not contain a file page */
{ {
void* data;
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(buf_pool->mutex))); ut_ad(mutex_own(&(buf_pool->mutex)));
ut_ad(mutex_own(&block->mutex)); ut_ad(mutex_own(&block->mutex));
...@@ -954,10 +988,11 @@ buf_LRU_block_free_non_file_page( ...@@ -954,10 +988,11 @@ buf_LRU_block_free_non_file_page(
memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4); memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4); memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4);
#endif #endif
if (block->page.zip.data) { data = block->page.zip.data;
/* TODO: return zip to an aligned pool */
ut_free(block->page.zip.data); if (data) {
block->page.zip.data = NULL; block->page.zip.data = NULL;
buf_buddy_free(data, page_zip_get_size(&block->page.zip));
page_zip_set_size(&block->page.zip, 0); page_zip_set_size(&block->page.zip, 0);
} }
...@@ -968,7 +1003,9 @@ buf_LRU_block_free_non_file_page( ...@@ -968,7 +1003,9 @@ buf_LRU_block_free_non_file_page(
} }
/********************************************************************** /**********************************************************************
Takes a block out of the LRU list and page hash table. */ Takes a block out of the LRU list and page hash table.
If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
the object will be freed and buf_pool->zip_mutex will be released. */
static static
enum buf_page_state enum buf_page_state
buf_LRU_block_remove_hashed_page( buf_LRU_block_remove_hashed_page(
...@@ -1053,7 +1090,15 @@ buf_LRU_block_remove_hashed_page( ...@@ -1053,7 +1090,15 @@ buf_LRU_block_remove_hashed_page(
memset(bpage->zip.data + FIL_PAGE_OFFSET, 0xff, 4); memset(bpage->zip.data + FIL_PAGE_OFFSET, 0xff, 4);
memset(bpage->zip.data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, memset(bpage->zip.data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
0xff, 4); 0xff, 4);
break;
UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
mutex_exit(&buf_pool->zip_mutex);
buf_buddy_free(bpage->zip.data,
page_zip_get_size(&bpage->zip));
buf_buddy_free(bpage, sizeof(*bpage));
return(BUF_BLOCK_ZIP_FREE);
case BUF_BLOCK_FILE_PAGE: case BUF_BLOCK_FILE_PAGE:
memset(((buf_block_t*) bpage)->frame memset(((buf_block_t*) bpage)->frame
+ FIL_PAGE_OFFSET, 0xff, 4); + FIL_PAGE_OFFSET, 0xff, 4);
...@@ -1061,10 +1106,12 @@ buf_LRU_block_remove_hashed_page( ...@@ -1061,10 +1106,12 @@ buf_LRU_block_remove_hashed_page(
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4); + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);
buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH); buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH);
if (bpage->zip.data) { if (zip && bpage->zip.data) {
/* Free the compressed page. */ /* Free the compressed page. */
ut_free(bpage->zip.data); void* data = bpage->zip.data;
bpage->zip.data = NULL; bpage->zip.data = NULL;
buf_buddy_free(data, page_zip_get_size(&bpage->zip));
page_zip_set_size(&bpage->zip, 0); page_zip_set_size(&bpage->zip, 0);
} }
...@@ -1089,16 +1136,16 @@ static ...@@ -1089,16 +1136,16 @@ static
void void
buf_LRU_block_free_hashed_page( buf_LRU_block_free_hashed_page(
/*===========================*/ /*===========================*/
buf_page_t* bpage) /* in: block, must contain a file page and buf_block_t* block) /* in: block, must contain a file page and
be in a state where it can be freed */ be in a state where it can be freed */
{ {
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(buf_pool->mutex))); ut_ad(mutex_own(&(buf_pool->mutex)));
ut_ad(mutex_own(buf_page_get_mutex(bpage))); ut_ad(mutex_own(&block->mutex));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
buf_page_set_state(bpage, BUF_BLOCK_MEMORY); buf_block_set_state(block, BUF_BLOCK_MEMORY);
buf_LRU_block_free_non_file_page((buf_block_t*) bpage); buf_LRU_block_free_non_file_page(block);
} }
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment