From 39e30c0b9b3bc2611f5e2f9b2074cf33025bda0b Mon Sep 17 00:00:00 2001
From: marko <>
Date: Wed, 3 Jan 2007 13:10:46 +0000
Subject: [PATCH] branches/zip: Use the buddy allocator for managing compressed
 pages. There is something wrong with the management of compressed-only blocks
 (BUF_BLOCK_ZIP_PAGE).  To disable the creation of such blocks, set zip=TRUE
 in buf_LRU_block_remove_hashed_page().

buf_LRU_block_remove_hashed_page(): Release buf_pool->zip_mutex when
freeing a compressed-only page and its control block, with buf_buddy_free().
Adapt callers.

buf_LRU_block_free_hashed_page(): Change the parameter type from buf_page_t*
to buf_block_t*.

buf_LRU_free_block(): Move below the definition of buf_LRU_add_block_low().
Allocate block descriptor for compressed-only blocks.

buf_LRU_block_free_non_file_page(): Replace ut_free() with buf_buddy_free().

buf_zip_decompress(): New function, split from buf_page_io_complete().

buf_page_init_for_read(): Do not allocate the compressed page until it is
really needed, to avoid the overhead of the buddy allocator.  Decompress
compressed-only blocks that are needed again.

buf_page_create(): Do not allocate the compressed page until it is
really needed, to avoid the overhead of the buddy allocator.

buf_validate(): Replace some equality tests on the lengths of the LRU
list and the flush lists with greater-or-equal tests, since the counted
numbers do not include control blocks for compressed-only pages.
---
 buf/buf0buf.c | 115 ++++++++++++++++++++-----------
 buf/buf0lru.c | 185 +++++++++++++++++++++++++++++++-------------------
 2 files changed, 193 insertions(+), 107 deletions(-)

diff --git a/buf/buf0buf.c b/buf/buf0buf.c
index 3191169593c..cbeb14af178 100644
--- a/buf/buf0buf.c
+++ b/buf/buf0buf.c
@@ -724,7 +724,7 @@ buf_chunk_contains_zip(
 #ifdef UNIV_SYNC_DEBUG
 	ut_a(mutex_own(&buf_pool->mutex));
 #endif /* UNIV_SYNC_DEBUG */
- 
+
 	block = chunk->blocks;
 
 	for (i = chunk->size; i--; block++) {
@@ -1974,6 +1974,53 @@ buf_page_init(
 #endif /* UNIV_DEBUG_FILE_ACCESSES */
 }
 
+/************************************************************************
+Decompress a block. */
+static
+ibool
+buf_zip_decompress(
+/*===============*/
+				/* out: TRUE if successful */
+	buf_block_t*	block)	/* in/out: block */
+{
+	const byte* frame = block->page.zip.data;
+
+	ut_ad(buf_block_get_zip_size(block));
+	ut_a(buf_block_get_space(block) != 0);
+
+	switch (fil_page_get_type(frame)) {
+	case FIL_PAGE_INDEX:
+		if (page_zip_decompress(&block->page.zip,
+					block->frame)) {
+			return(TRUE);
+		}
+
+		fprintf(stderr,
+			"InnoDB: unable to decompress space %lu page %lu\n",
+			(ulong) block->page.space,
+			(ulong) block->page.offset);
+		return(FALSE);
+
+	case FIL_PAGE_TYPE_ALLOCATED:
+	case FIL_PAGE_INODE:
+	case FIL_PAGE_IBUF_BITMAP:
+	case FIL_PAGE_TYPE_FSP_HDR:
+	case FIL_PAGE_TYPE_XDES:
+	case FIL_PAGE_TYPE_ZBLOB:
+		/* Copy to uncompressed storage. */
+		memcpy(block->frame, frame,
+		       buf_block_get_zip_size(block));
+		return(TRUE);
+	}
+
+	ut_print_timestamp(stderr);
+	fprintf(stderr,
+		"InnoDB: unknown compressed page"
+		" type %lu\n",
+		fil_page_get_type(frame));
+	return(FALSE);
+}
+
 /************************************************************************
 Function which inits a page for read to the buffer buf_pool. If the page is
 (1) already in buf_pool, or
@@ -2025,7 +2072,7 @@ buf_page_init_for_read(
 		ut_ad(mode == BUF_READ_ANY_PAGE);
 	}
 
-	block = buf_LRU_get_free_block(zip_size);
+	block = buf_LRU_get_free_block(0);
 
 	ut_a(block);
 
@@ -2058,8 +2105,6 @@ buf_page_init_for_read(
 			/* Move the compressed page from bpage to block,
 			and uncompress it. */
 
-			buf_buddy_free(block->page.zip.data, zip_size);
-
 			mutex_enter(&buf_pool->zip_mutex);
 			memcpy(&block->page, bpage, sizeof *bpage);
 			block->page.state = BUF_BLOCK_FILE_PAGE;
@@ -2093,8 +2138,18 @@ buf_page_init_for_read(
 
 			buf_buddy_free(bpage, sizeof *bpage);
 
+			mutex_exit(&block->mutex);
 			mutex_exit(&buf_pool->zip_mutex);
-			break;
+			mutex_exit(&buf_pool->mutex);
+
+			if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+
+				mtr_commit(&mtr);
+			}
+
+			buf_zip_decompress(block);
+
+			return(NULL);
 		case BUF_BLOCK_FILE_PAGE:
 			break;
 		case BUF_BLOCK_ZIP_FREE:
@@ -2126,6 +2181,11 @@ buf_page_init_for_read(
 
 	ut_ad(block);
 
+	if (zip_size) {
+		page_zip_set_size(&block->page.zip, zip_size);
+		block->page.zip.data = buf_buddy_alloc(zip_size, TRUE);
+	}
+
 	buf_page_init(space, offset, block);
 
 	/* The block must be put to the LRU list, to the old blocks */
@@ -2179,7 +2239,7 @@ buf_page_create(
 	ut_ad(mtr);
 	ut_ad(space || !zip_size);
 
-	free_block = buf_LRU_get_free_block(zip_size);
+	free_block = buf_LRU_get_free_block(0);
 
 	mutex_enter(&(buf_pool->mutex));
 
@@ -2214,6 +2274,11 @@ buf_page_create(
 
 	block = free_block;
 
+	if (zip_size) {
+		page_zip_set_size(&block->page.zip, zip_size);
+		block->page.zip.data = buf_buddy_alloc(zip_size, TRUE);
+	}
+
 	mutex_enter(&block->mutex);
 
 	buf_page_init(space, offset, block);
@@ -2293,36 +2358,10 @@ buf_page_io_complete(
 		byte*	frame;
 
 		if (buf_block_get_zip_size(block)) {
-			ut_a(buf_block_get_space(block) != 0);
-
 			frame = block->page.zip.data;
 
-			switch (fil_page_get_type(frame)) {
-			case FIL_PAGE_INDEX:
-				if (block->frame) {
-					if (!page_zip_decompress(
-						    &block->page.zip,
-						    block->frame)) {
-						goto corrupt;
-					}
-				}
-				break;
-			case FIL_PAGE_TYPE_ALLOCATED:
-			case FIL_PAGE_INODE:
-			case FIL_PAGE_IBUF_BITMAP:
-			case FIL_PAGE_TYPE_FSP_HDR:
-			case FIL_PAGE_TYPE_XDES:
-			case FIL_PAGE_TYPE_ZBLOB:
-				/* Copy to uncompressed storage. */
-				memcpy(block->frame, frame,
-				       buf_block_get_zip_size(block));
-				break;
-			default:
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					"InnoDB: unknown compressed page"
-					" type %lu\n",
-					fil_page_get_type(frame));
+			if (!buf_zip_decompress(block)) {
+
 				goto corrupt;
 			}
 		} else {
@@ -2636,7 +2675,7 @@ buf_validate(void)
 		ut_error;
 	}
 
-	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
+	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) >= n_lru);
 	if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
 		fprintf(stderr, "Free list len %lu, free blocks %lu\n",
 			(ulong) UT_LIST_GET_LEN(buf_pool->free),
@@ -2645,9 +2684,9 @@ buf_validate(void)
 	}
 	ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
 
-	ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
-	ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
-	ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
+	ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] >= n_single_flush);
+	ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] >= n_list_flush);
+	ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] >= n_lru_flush);
 
 	mutex_exit(&(buf_pool->mutex));
 
diff --git a/buf/buf0lru.c b/buf/buf0lru.c
index 6a57aa65bd1..ce3d7da01da 100644
--- a/buf/buf0lru.c
+++ b/buf/buf0lru.c
@@ -49,7 +49,9 @@ frames in the buffer pool, we set this to TRUE */
 ibool	buf_lru_switched_on_innodb_mon	= FALSE;
 
 /**********************************************************************
-Takes a block out of the LRU list and page hash table. */
+Takes a block out of the LRU list and page hash table.
+If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
+the object will be freed and buf_pool->zip_mutex will be released. */
 static
 enum buf_page_state
 buf_LRU_block_remove_hashed_page(
@@ -69,7 +71,7 @@ static
 void
 buf_LRU_block_free_hashed_page(
 /*===========================*/
-	buf_page_t*	block);	/* in: block, must contain a file page and
+	buf_block_t*	block);	/* in: block, must contain a file page and
 				be in a state where it can be freed */
 
 /**********************************************************************
@@ -146,11 +148,15 @@ buf_LRU_invalidate_tablespace(
 			/* Remove from the LRU list */
 			if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
 			    != BUF_BLOCK_ZIP_FREE) {
-				buf_LRU_block_free_hashed_page(bpage);
+				buf_LRU_block_free_hashed_page((buf_block_t*)
+							       bpage);
+			} else {
+				goto next_page2;
 			}
 		}
 next_page:
 		mutex_exit(block_mutex);
+next_page2:
 		bpage = prev_bpage;
 	}
 
@@ -232,59 +238,6 @@ buf_LRU_insert_zip_clean(
 	}
 }
 
-/**********************************************************************
-Try to free a block. */
-
-ibool
-buf_LRU_free_block(
-/*===============*/
-				/* out: TRUE if freed */
-	buf_page_t*	bpage,	/* in: block to be freed */
-	ibool		zip)	/* in: TRUE if should remove also the
-				compressed page of an uncompressed page */
-{
-	mutex_t*	block_mutex = buf_page_get_mutex(bpage);
-
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(mutex_own(&buf_pool->mutex));
-	ut_ad(mutex_own(block_mutex));
-#endif /* UNIV_SYNC_DEBUG */
-
-	ut_ad(buf_page_in_file(bpage));
-	ut_ad(bpage->in_LRU_list);
-
-	if (!buf_flush_ready_for_replace(bpage)) {
-
-		return(FALSE);
-	}
-
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints) {
-		fprintf(stderr, "Putting space %lu page %lu to free list\n",
-			(ulong) buf_page_get_space(bpage),
-			(ulong) buf_page_get_page_no(bpage));
-	}
-#endif /* UNIV_DEBUG */
-
-	if (buf_LRU_block_remove_hashed_page(bpage, zip)
-	    != BUF_BLOCK_ZIP_FREE) {
-		mutex_exit(&(buf_pool->mutex));
-		mutex_exit(block_mutex);
-
-		/* Remove possible adaptive hash index on the page */
-
-		btr_search_drop_page_hash_index((buf_block_t*) bpage);
-		ut_a(bpage->buf_fix_count == 0);
-
-		mutex_enter(&(buf_pool->mutex));
-		mutex_enter(block_mutex);
-
-		buf_LRU_block_free_hashed_page(bpage);
-	}
-
-	return(TRUE);
-}
-
 /**********************************************************************
 Look for a replaceable block from the end of the LRU list and put it to
 the free list if found. */
@@ -531,8 +484,7 @@ buf_LRU_get_free_block(
 
 		if (zip_size) {
 			page_zip_set_size(&block->page.zip, zip_size);
-			/* TODO: allocate zip from an aligned pool */
-			block->page.zip.data = ut_malloc(zip_size);
+			block->page.zip.data = buf_buddy_alloc(zip_size, TRUE);
 		} else {
 			page_zip_set_size(&block->page.zip, 0);
 			block->page.zip.data = NULL;
@@ -919,6 +871,87 @@ buf_LRU_make_block_old(
 	buf_LRU_add_block_to_end_low(bpage);
 }
 
+/**********************************************************************
+Try to free a block. */
+
+ibool
+buf_LRU_free_block(
+/*===============*/
+				/* out: TRUE if freed */
+	buf_page_t*	bpage,	/* in: block to be freed */
+	ibool		zip)	/* in: TRUE if should remove also the
+				compressed page of an uncompressed page */
+{
+	mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(mutex_own(&buf_pool->mutex));
+	ut_ad(mutex_own(block_mutex));
+#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(buf_page_in_file(bpage));
+	ut_ad(bpage->in_LRU_list);
+
+	if (!buf_flush_ready_for_replace(bpage)) {
+
+		return(FALSE);
+	}
+
+#ifdef UNIV_DEBUG
+	if (buf_debug_prints) {
+		fprintf(stderr, "Putting space %lu page %lu to free list\n",
+			(ulong) buf_page_get_space(bpage),
+			(ulong) buf_page_get_page_no(bpage));
+	}
+#endif /* UNIV_DEBUG */
+
+	if (buf_LRU_block_remove_hashed_page(bpage, zip)
+	    != BUF_BLOCK_ZIP_FREE) {
+		mutex_exit(&(buf_pool->mutex));
+		mutex_exit(block_mutex);
+
+		/* Remove possible adaptive hash index on the page */
+
+		btr_search_drop_page_hash_index((buf_block_t*) bpage);
+		ut_a(bpage->buf_fix_count == 0);
+
+		mutex_enter(&(buf_pool->mutex));
+		mutex_enter(block_mutex);
+
+		if (bpage->zip.data) {
+			/* Keep the compressed page.
+			Allocate a block descriptor for it. */
+			buf_page_t* b = buf_buddy_alloc(sizeof *b, FALSE);
+
+			if (b) {
+				ulint	fold;
+
+				memcpy(b, bpage, sizeof *b);
+				b->state = BUF_BLOCK_ZIP_PAGE;
+
+				fold = buf_page_address_fold(b->space,
+							     b->offset);
+
+				HASH_INSERT(buf_page_t, hash,
+					    buf_pool->page_hash, fold, b);
+
+				buf_LRU_add_block_low(b, TRUE);
+
+				buf_LRU_insert_zip_clean(b);
+
+				bpage->zip.data = NULL;
+				page_zip_set_size(&bpage->zip, 0);
+			}
+		}
+
+		buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
+	} else {
+		mutex_enter(block_mutex);
+	}
+
+	return(TRUE);
+}
+
 /**********************************************************************
 Puts a block back to the free list. */
 
@@ -927,6 +960,7 @@ buf_LRU_block_free_non_file_page(
 /*=============================*/
 	buf_block_t*	block)	/* in: block, must not contain a file page */
 {
+	void*	data;
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&(buf_pool->mutex)));
 	ut_ad(mutex_own(&block->mutex));
@@ -954,10 +988,11 @@ buf_LRU_block_free_non_file_page(
 	memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
 	memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4);
 #endif
-	if (block->page.zip.data) {
-		/* TODO: return zip to an aligned pool */
-		ut_free(block->page.zip.data);
+	data = block->page.zip.data;
+
+	if (data) {
 		block->page.zip.data = NULL;
+		buf_buddy_free(data, page_zip_get_size(&block->page.zip));
 		page_zip_set_size(&block->page.zip, 0);
 	}
 
@@ -968,7 +1003,9 @@ buf_LRU_block_free_non_file_page(
 }
 
 /**********************************************************************
-Takes a block out of the LRU list and page hash table. */
+Takes a block out of the LRU list and page hash table.
+If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
+the object will be freed and buf_pool->zip_mutex will be released. */
 static
 enum buf_page_state
 buf_LRU_block_remove_hashed_page(
@@ -1053,7 +1090,15 @@ buf_LRU_block_remove_hashed_page(
 		memset(bpage->zip.data + FIL_PAGE_OFFSET, 0xff, 4);
 		memset(bpage->zip.data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
 		       0xff, 4);
-		break;
+
+		UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
+
+		mutex_exit(&buf_pool->zip_mutex);
+		buf_buddy_free(bpage->zip.data,
+			       page_zip_get_size(&bpage->zip));
+		buf_buddy_free(bpage, sizeof(*bpage));
+		return(BUF_BLOCK_ZIP_FREE);
+
 	case BUF_BLOCK_FILE_PAGE:
 		memset(((buf_block_t*) bpage)->frame
 		       + FIL_PAGE_OFFSET, 0xff, 4);
@@ -1061,10 +1106,12 @@ buf_LRU_block_remove_hashed_page(
 		       + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);
 		buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH);
 
-		if (bpage->zip.data) {
+		if (zip && bpage->zip.data) {
 			/* Free the compressed page. */
-			ut_free(bpage->zip.data);
+			void*	data = bpage->zip.data;
 			bpage->zip.data = NULL;
+
+			buf_buddy_free(data, page_zip_get_size(&bpage->zip));
 			page_zip_set_size(&bpage->zip, 0);
 		}
 
@@ -1089,16 +1136,16 @@ static
 void
 buf_LRU_block_free_hashed_page(
 /*===========================*/
-	buf_page_t*	bpage)	/* in: block, must contain a file page and
+	buf_block_t*	block)	/* in: block, must contain a file page and
 				be in a state where it can be freed */
 {
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&(buf_pool->mutex)));
-	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+	ut_ad(mutex_own(&block->mutex));
 #endif /* UNIV_SYNC_DEBUG */
-	buf_page_set_state(bpage, BUF_BLOCK_MEMORY);
+	buf_block_set_state(block, BUF_BLOCK_MEMORY);
 
-	buf_LRU_block_free_non_file_page((buf_block_t*) bpage);
+	buf_LRU_block_free_non_file_page(block);
 }
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-- 
2.30.9