Commit 9d58e3ff authored by marko's avatar marko

branches/zip: Improve the LRU algorithm with a separate unzip_LRU list of

blocks that contains uncompressed and compressed frames.  This patch was
designed by Heikki and Inaam, implemented by Inaam, and refined and reviewed
by Marko and Sunny.

buf_buddy_n_frames, buf_buddy_min_n_frames, buf_buddy_max_n_frames: Remove.

buf_page_belongs_to_unzip_LRU(): New predicate:
bpage->zip.data && buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE.

buf_pool_t, buf_block_t: Add the linked list unzip_LRU.  A block in the
regular LRU list is in unzip_LRU iff buf_page_belongs_to_unzip_LRU() holds.

buf_LRU_free_block(): Add a third return value to refine the case
"cannot free the block".

buf_LRU_search_and_free_block(): Update the documentation to reflect the
implementation.

buf_LRU_stat_t, buf_LRU_stat_cur, buf_LRU_stat_sum, buf_LRU_stat_arr[]:
Statistics for the unzip_LRU algorithm.

buf_LRU_stat_update(): New function: Update the statistics.  Called once
per second by srv_error_monitor_thread().

buf_LRU_validate(): Validate the unzip_LRU list as well.

buf_LRU_evict_from_unzip_LRU(): New predicate: Use the unzip_LRU before
falling back to the regular LRU?

buf_LRU_free_from_unzip_LRU_list(), buf_LRU_free_from_common_LRU_list():
Subfunctions of buf_LRU_search_and_free_block().

buf_LRU_search_and_free_block(): Reimplement.  Try to evict an uncompressed
page from the unzip_LRU list before falling back to evicting an entire block
from the common LRU list.

buf_unzip_LRU_remove_block_if_needed(): New function.

buf_unzip_LRU_add_block(): New function: Add a block to the unzip_LRU list.
parent 149298b7
......@@ -3682,7 +3682,8 @@ btr_blob_free(
&& buf_block_get_space(block) == space
&& buf_block_get_page_no(block) == page_no) {
if (!buf_LRU_free_block(&block->page, all, NULL)
if (buf_LRU_free_block(&block->page, all, NULL)
!= BUF_LRU_FREED
&& all && block->page.zip.data) {
/* Attempt to deallocate the uncompressed page
if the whole block cannot be deallocted. */
......
......@@ -19,9 +19,11 @@ Created December 2006 by Marko Makela
/* Statistic counters */
#ifdef UNIV_DEBUG
/** Number of frames allocated from the buffer pool to the buddy system.
Protected by buf_pool_mutex. */
UNIV_INTERN ulint buf_buddy_n_frames;
static ulint buf_buddy_n_frames;
#endif /* UNIV_DEBUG */
/** Counts of blocks allocated from the buddy system.
Protected by buf_pool_mutex. */
UNIV_INTERN ulint buf_buddy_used[BUF_BUDDY_SIZES + 1];
......@@ -32,17 +34,6 @@ UNIV_INTERN ib_uint64_t buf_buddy_relocated[BUF_BUDDY_SIZES + 1];
Protected by buf_pool_mutex. */
UNIV_INTERN ullint buf_buddy_relocated_duration[BUF_BUDDY_SIZES + 1];
/** Preferred minimum number of frames allocated from the buffer pool
to the buddy system. Unless this number is exceeded or the buffer
pool is scarce, the LRU algorithm will not free compressed-only pages
in order to satisfy an allocation request. Protected by buf_pool_mutex. */
UNIV_INTERN ulint buf_buddy_min_n_frames = 0;
/** Preferred maximum number of frames allocated from the buffer pool
to the buddy system. Unless this number is exceeded, the buddy allocator
will not try to free clean compressed-only pages before falling back
to the LRU algorithm. Protected by buf_pool_mutex. */
UNIV_INTERN ulint buf_buddy_max_n_frames = ULINT_UNDEFINED;
/**************************************************************************
Get the offset of the buddy of a compressed page frame. */
UNIV_INLINE
......@@ -204,7 +195,7 @@ buf_buddy_block_free(
mutex_exit(&block->mutex);
ut_ad(buf_buddy_n_frames > 0);
buf_buddy_n_frames--;
ut_d(buf_buddy_n_frames--);
}
/**************************************************************************
......@@ -229,7 +220,7 @@ buf_buddy_block_register(
ut_d(block->page.in_zip_hash = TRUE);
HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
buf_buddy_n_frames++;
ut_d(buf_buddy_n_frames++);
}
/**************************************************************************
......
......@@ -133,7 +133,7 @@ There are several lists of control blocks.
The free list (buf_pool->free) contains blocks which are currently not
used.
The LRU-list contains all the blocks holding a file page
The common LRU list contains all the blocks holding a file page
except those for which the bufferfix count is non-zero.
The pages are in the LRU list roughly in the order of the last
access to the page, so that the oldest pages are at the end of the
......@@ -148,6 +148,14 @@ table which cannot fit in the memory. Putting the pages near the
of the LRU list, we make sure that most of the buf_pool stays in the
main memory, undisturbed.
The unzip_LRU list contains a subset of the common LRU list. The
blocks on the unzip_LRU list hold a compressed file page and the
corresponding uncompressed page frame. A block is in unzip_LRU if and
only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
holds. The blocks in unzip_LRU will be in same order as they are in
the common LRU list. That is, each manipulation of the common LRU
list will result in the same manipulation of the unzip_LRU list.
The chain of modified blocks (buf_pool->flush_list) contains the blocks
holding file pages that have been modified in the memory
but not written to disk yet. The block with the oldest modification
......@@ -649,6 +657,7 @@ buf_block_init(
block->page.in_flush_list = FALSE;
block->page.in_free_list = FALSE;
block->page.in_LRU_list = FALSE;
block->in_unzip_LRU_list = FALSE;
block->n_pointers = 0;
#endif /* UNIV_DEBUG */
page_zip_des_init(&block->page.zip);
......@@ -881,6 +890,7 @@ buf_chunk_free(
ut_a(!block->page.zip.data);
ut_ad(!block->page.in_LRU_list);
ut_ad(!block->in_unzip_LRU_list);
ut_ad(!block->page.in_flush_list);
/* Remove the block from the free list. */
ut_ad(block->page.in_free_list);
......@@ -1147,8 +1157,8 @@ shrink_again:
buf_LRU_make_block_old(&block->page);
dirty++;
} else if (!buf_LRU_free_block(&block->page,
TRUE, NULL)) {
} else if (buf_LRU_free_block(&block->page, TRUE, NULL)
!= BUF_LRU_FREED) {
nonfree++;
}
......@@ -1588,7 +1598,8 @@ lookup:
break;
case BUF_BLOCK_FILE_PAGE:
/* Discard the uncompressed page frame if possible. */
if (buf_LRU_free_block(bpage, FALSE, NULL)) {
if (buf_LRU_free_block(bpage, FALSE, NULL)
== BUF_LRU_FREED) {
mutex_exit(block_mutex);
goto lookup;
......@@ -1964,8 +1975,13 @@ wait_until_unfixed:
}
/* Buffer-fix, I/O-fix, and X-latch the block
for the duration of the decompression. */
for the duration of the decompression.
Also add the block to the unzip_LRU list. */
block->page.state = BUF_BLOCK_FILE_PAGE;
/* Insert at the front of unzip_LRU list */
buf_unzip_LRU_add_block(block, FALSE);
block->page.buf_fix_count = 1;
buf_block_set_io_fix(block, BUF_IO_READ);
buf_pool->n_pend_unzip++;
......@@ -2631,6 +2647,14 @@ err_exit2:
data = buf_buddy_alloc(zip_size, &lru);
mutex_enter(&block->mutex);
block->page.zip.data = data;
/* To maintain the invariant
block->in_unzip_LRU_list
== buf_page_belongs_to_unzip_LRU(&block->page)
we have to add this block to unzip_LRU
after block->page.zip.data is set. */
ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
buf_unzip_LRU_add_block(block, TRUE);
}
mutex_exit(&block->mutex);
......@@ -2794,6 +2818,14 @@ buf_page_create(
mutex_enter(&block->mutex);
block->page.zip.data = data;
/* To maintain the invariant
block->in_unzip_LRU_list
== buf_page_belongs_to_unzip_LRU(&block->page)
we have to add this block to unzip_LRU after
block->page.zip.data is set. */
ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
buf_unzip_LRU_add_block(block, FALSE);
buf_page_set_io_fix(&block->page, BUF_IO_NONE);
rw_lock_x_unlock(&block->lock);
}
......@@ -3073,6 +3105,7 @@ buf_pool_invalidate(void)
buf_pool_mutex_enter();
ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
buf_pool_mutex_exit();
}
......@@ -3606,6 +3639,16 @@ buf_print_io(
buf_pool->n_pages_created_old = buf_pool->n_pages_created;
buf_pool->n_pages_written_old = buf_pool->n_pages_written;
/* Print some values to help us with visualizing what is
happening with LRU eviction. */
fprintf(file,
"LRU len: %lu, unzip_LRU len: %lu\n"
"I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
UT_LIST_GET_LEN(buf_pool->LRU),
UT_LIST_GET_LEN(buf_pool->unzip_LRU),
buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
buf_pool_mutex_exit();
}
......
......@@ -476,6 +476,11 @@ flush:
buf_page_get_zip_size(&block->page),
(void*)block->page.zip.data,
(void*)block);
/* Increment the counter of I/O operations used
for selecting LRU policy. */
buf_LRU_stat_inc_io();
continue;
}
......@@ -505,6 +510,10 @@ flush:
FALSE, buf_block_get_space(block), 0,
buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
(void*)block->frame, (void*)block);
/* Increment the counter of I/O operations used
for selecting LRU policy. */
buf_LRU_stat_inc_io();
}
/* Wake possible simulated aio thread to actually post the
......
This diff is collapsed.
......@@ -353,6 +353,9 @@ buf_read_page(
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
/* Increment number of I/O operations used for LRU policy. */
buf_LRU_stat_inc_io();
return(count + count2);
}
......@@ -613,6 +616,10 @@ buf_read_ahead_linear(
}
#endif /* UNIV_DEBUG */
/* Read ahead is considered one I/O operation for the purpose of
LRU policy decision. */
buf_LRU_stat_inc_io();
++srv_read_ahead_seq;
return(count);
}
......
......@@ -50,19 +50,6 @@ buf_buddy_free(
ulint size) /* in: block size, up to UNIV_PAGE_SIZE */
__attribute__((nonnull));
/** Number of frames allocated from the buffer pool to the buddy system.
Protected by buf_pool_mutex. */
extern ulint buf_buddy_n_frames;
/** Preferred minimum number of frames allocated from the buffer pool
to the buddy system. Unless this number is exceeded or the buffer
pool is scarce, the LRU algorithm will not free compressed-only pages
in order to satisfy an allocation request. Protected by buf_pool_mutex. */
extern ulint buf_buddy_min_n_frames;
/** Preferred maximum number of frames allocated from the buffer pool
to the buddy system. Unless this number is exceeded, the buddy allocator
will not try to free clean compressed-only pages before falling back
to the LRU algorithm. Protected by buf_pool_mutex. */
extern ulint buf_buddy_max_n_frames;
/** Counts of blocks allocated from the buddy system.
Protected by buf_pool_mutex. */
extern ulint buf_buddy_used[BUF_BUDDY_SIZES + 1];
......
......@@ -645,6 +645,16 @@ buf_page_in_file(
const buf_page_t* bpage) /* in: pointer to control block */
__attribute__((pure));
/*************************************************************************
Determines if a block should be on unzip_LRU list. */
UNIV_INLINE
ibool
buf_page_belongs_to_unzip_LRU(
/*==========================*/
/* out: TRUE if block belongs
to unzip_LRU */
const buf_page_t* bpage) /* in: pointer to control block */
__attribute__((pure));
/*************************************************************************
Determine the approximate LRU list position of a block. */
UNIV_INLINE
ulint
......@@ -1042,7 +1052,7 @@ struct buf_page_struct{
UT_LIST_NODE_T(buf_page_t) LRU;
/* node of the LRU list */
#ifdef UNIV_DEBUG
ibool in_LRU_list; /* TRUE of the page is in the LRU list;
ibool in_LRU_list; /* TRUE if the page is in the LRU list;
used in debugging */
#endif /* UNIV_DEBUG */
unsigned old:1; /* TRUE if the block is in the old
......@@ -1079,6 +1089,16 @@ struct buf_block_struct{
be the first field, so that
buf_pool->page_hash can point
to buf_page_t or buf_block_t */
UT_LIST_NODE_T(buf_block_t) unzip_LRU;
/* node of the decompressed LRU list;
a block is in the unzip_LRU list
if page.state == BUF_BLOCK_FILE_PAGE
and page.zip.data != NULL */
#ifdef UNIV_DEBUG
ibool in_unzip_LRU_list;/* TRUE if the page is in the
decompressed LRU list;
used in debugging */
#endif /* UNIV_DEBUG */
byte* frame; /* pointer to buffer frame which
is of size UNIV_PAGE_SIZE, and
aligned to an address divisible by
......@@ -1263,6 +1283,9 @@ struct buf_pool_struct{
on this value; not defined if
LRU_old == NULL */
UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
/* base node of the unzip_LRU list */
/* 4. Fields for the buddy allocator of compressed pages */
UT_LIST_BASE_NODE_T(buf_page_t) zip_clean;
/* unmodified compressed pages */
......
......@@ -235,6 +235,22 @@ buf_page_in_file(
return(FALSE);
}
/*************************************************************************
Determines if a block should be on unzip_LRU list. */
UNIV_INLINE
ibool
buf_page_belongs_to_unzip_LRU(
/*==========================*/
/* out: TRUE if block belongs
to unzip_LRU */
const buf_page_t* bpage) /* in: pointer to control block */
{
ut_ad(buf_page_in_file(bpage));
return(bpage->zip.data
&& buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
}
/*************************************************************************
Determine the approximate LRU list position of a block. */
UNIV_INLINE
......
......@@ -13,6 +13,18 @@ Created 11/5/1995 Heikki Tuuri
#include "ut0byte.h"
#include "buf0types.h"
/** The return type of buf_LRU_free_block() */
enum buf_lru_free_block_status {
/** freed */
BUF_LRU_FREED = 0,
/** not freed because the caller asked to remove the
uncompressed frame but the control block cannot be
relocated */
BUF_LRU_CANNOT_RELOCATE,
/** not freed because of some other reason */
BUF_LRU_NOT_FREED
};
/**********************************************************************
Tries to remove LRU flushed blocks from the end of the LRU list and put them
to the free list. This is beneficial for the efficiency of the insert buffer
......@@ -72,19 +84,20 @@ void
buf_LRU_insert_zip_clean(
/*=====================*/
buf_page_t* bpage); /* in: pointer to the block in question */
/**********************************************************************
Try to free a block. */
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well. If this function
returns BUF_LRU_FREED, it will not temporarily release
buf_pool_mutex. */
UNIV_INTERN
ibool
enum buf_lru_free_block_status
buf_LRU_free_block(
/*===============*/
/* out: TRUE if freed. If bpage is a
descriptor of a compressed-only page,
the descriptor object will be freed
as well. If this function returns FALSE,
it will not temporarily release
buf_pool_mutex. */
buf_page_t* block, /* in: block to be freed */
/* out: BUF_LRU_FREED if freed,
BUF_LRU_CANNOT_RELOCATE or
BUF_LRU_NOT_FREED otherwise. */
buf_page_t* bpage, /* in: block to be freed */
ibool zip, /* in: TRUE if should remove also the
compressed page of an uncompressed page */
ibool* buf_pool_mutex_released);
......@@ -92,19 +105,20 @@ buf_LRU_free_block(
be assigned TRUE if buf_pool_mutex
was temporarily released, or NULL */
/**********************************************************************
Look for a replaceable block from the end of the LRU list and put it to
the free list if found. */
Try to free a replaceable block. */
UNIV_INTERN
ibool
buf_LRU_search_and_free_block(
/*==========================*/
/* out: TRUE if freed */
ulint n_iterations); /* in: how many times this has been called
/* out: TRUE if found and freed */
ulint n_iterations); /* in: how many times this has been called
repeatedly without result: a high value means
that we should search farther; if value is
k < 10, then we only search k/10 * number
of pages in the buffer pool from the end
of the LRU list */
that we should search farther; if
n_iterations < 10, then we search
n_iterations / 10 * buf_pool->curr_size
pages from the end of the LRU list; if
n_iterations < 5, then we will also search
n_iterations / 5 of the unzip_LRU list. */
/**********************************************************************
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, returns NULL. */
......@@ -146,6 +160,15 @@ buf_LRU_add_block(
start; if the LRU list is very short, added to
the start regardless of this parameter */
/**********************************************************************
Adds a block to the LRU list of decompressed zip pages. */
UNIV_INTERN
void
buf_unzip_LRU_add_block(
/*====================*/
buf_block_t* block, /* in: control block */
ibool old); /* in: TRUE if should be put to the end
of the list, else put to the start */
/**********************************************************************
Moves a block to the start of the LRU list. */
UNIV_INTERN
void
......@@ -159,6 +182,14 @@ void
buf_LRU_make_block_old(
/*===================*/
buf_page_t* bpage); /* in: control block */
/************************************************************************
Update the historical stats that we are collecting for LRU eviction
policy at the end of each interval. */
UNIV_INTERN
void
buf_LRU_stat_update(void);
/*=====================*/
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**************************************************************************
Validates the LRU list. */
......@@ -176,6 +207,35 @@ buf_LRU_print(void);
/*===============*/
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
/**********************************************************************
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
and page_zip_decompress() operations. Based on the statistics we decide
if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */
/** Statistics for selecting the LRU list for eviction. */
struct buf_LRU_stat_struct
{
ulint io; /**< Counter of buffer pool I/O operations. */
ulint unzip; /**< Counter of page_zip_decompress operations. */
};
typedef struct buf_LRU_stat_struct buf_LRU_stat_t;
/** Current operation counters. Not protected by any mutex.
Cleared by buf_LRU_stat_update(). */
extern buf_LRU_stat_t buf_LRU_stat_cur;
/** Running sum of past values of buf_LRU_stat_cur.
Updated by buf_LRU_stat_update(). Protected by buf_pool_mutex. */
extern buf_LRU_stat_t buf_LRU_stat_sum;
/************************************************************************
Increments the I/O counter in buf_LRU_stat_cur. */
#define buf_LRU_stat_inc_io() buf_LRU_stat_cur.io++
/************************************************************************
Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++
#ifndef UNIV_NONINL
#include "buf0lru.ic"
#endif
......
......@@ -23,6 +23,7 @@ Created June 2005 by Marko Makela
#include "lock0lock.h"
#include "log0recv.h"
#include "zlib.h"
#include "buf0lru.h"
/** Number of page compressions, indexed by page_zip_des_t::ssize */
UNIV_INTERN ulint page_zip_compress_count[8];
......@@ -2945,6 +2946,9 @@ err_exit:
page_zip_decompress_duration[page_zip->ssize]
+= ut_time_us(NULL) - usec;
/* Update the stat counter for LRU policy. */
buf_LRU_stat_inc_unzip();
return(TRUE);
}
......
......@@ -43,6 +43,7 @@ Created 10/8/1995 Heikki Tuuri
#include "trx0purge.h"
#include "ibuf0ibuf.h"
#include "buf0flu.h"
#include "buf0lru.h"
#include "btr0sea.h"
#include "dict0load.h"
#include "dict0boot.h"
......@@ -2052,6 +2053,10 @@ loop:
srv_refresh_innodb_monitor_stats();
}
/* Update the statistics collected for deciding LRU
eviction policy. */
buf_LRU_stat_update();
/* In case mutex_exit is not a memory barrier, it is
theoretically possible some threads are left waiting though
the semaphore is already released. Wake up those threads: */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment