Commit 716e0769 authored by marko's avatar marko

branches/zip: Replace the constant 3/8 ratio that controls the LRU_old

size with the settable global variable innodb_old_blocks_pct. The
minimum and maximum values are 5 and 95 per cent, respectively. The
default is 100*3/8, in line with the old behavior.

ut_time_ms(): New utility function, to return the current time in
milliseconds. TODO: Is there a more efficient timestamp function, such
as rdtsc divided by a power of two?

buf_LRU_old_threshold_ms: New variable, corresponding to
innodb_old_blocks_time. The value 0 is the default behaviour: no
timeout before making blocks 'new'.

bpage->accessed, bpage->LRU_position, buf_pool->ulint_clock: Remove.

bpage->access_time: New field, replacing bpage->accessed. Protected by
buf_pool_mutex instead of bpage->mutex. Updated when a page is created
or accessed the first time in the buffer pool.

buf_LRU_old_ratio, innobase_old_blocks_pct: New variables,
corresponding to innodb_old_blocks_pct

buf_LRU_old_ratio_update(), innobase_old_blocks_pct_update(): Update
functions for buf_LRU_old_ratio, innobase_old_blocks_pct.

buf_page_peek_if_too_old(): Compare ut_time_ms() to bpage->access_time
if buf_LRU_old_threshold_ms && bpage->old.  Else observe
buf_LRU_old_ratio and bpage->freed_page_clock.

buf_pool_t: Add n_pages_made_young, n_pages_not_made_young,
n_pages_made_young_old, n_pages_not_made_young, for statistics.

buf_print(): Display buf_pool->n_pages_made_young,
buf_pool->n_pages_not_made_young.  This function is only for crash
diagnostics.

buf_print_io(): Display buf_pool->LRU_old_len and quantities derived
from buf_pool->n_pages_made_young, buf_pool->n_pages_not_made_young.
This function is invoked by SHOW ENGINE INNODB STATUS.

rb://129 approved by Heikki Tuuri.  This addresses Bug #45015.
parent 61bb053b
2009-08-27 The InnoDB Team
* buf/buf0buf.c, buf/buf0lru.c, buf/buf0rea.c,
handler/ha_innodb.cc, include/buf0buf.h, include/buf0buf.ic,
include/buf0lru.h, include/ut0ut.h, ut/ut0ut.c:
Make it possible to tune the buffer pool LRU eviction policy to be
more resistant against index scans. Introduce the settable global
variables innodb_old_blocks_pct and innodb_old_blocks_time for
controlling the buffer pool eviction policy. The parameter
innodb_old_blocks_pct (5..95) controls the desired amount of "old"
blocks in the LRU list. The default is 37, corresponding to the
old fixed ratio of 3/8. Each time a block is accessed, it will be
moved to the "new" blocks if its first access was at least
innodb_old_blocks_time milliseconds ago (default 0, meaning every
block). The idea is that in index scans, blocks will be accessed
a few times within innodb_old_blocks_time, and they will remain in
the "old" section of the LRU list. Thus, when
innodb_old_blocks_time is nonzero, blocks retrieved for one-time
index scans will be more likely candidates for eviction than
blocks that are accessed in random patterns.
2009-08-26 The InnoDB Team
* handler/ha_innodb.cc, os/os0file.c:
......
......@@ -966,8 +966,6 @@ buf_pool_init(void)
buf_pool->no_flush[i] = os_event_create(NULL);
}
buf_pool->ulint_clock = 1;
/* 3. Initialize LRU fields
--------------------------- */
/* All fields are initialized by mem_zalloc(). */
......@@ -1470,31 +1468,6 @@ buf_pool_resize(void)
buf_pool_page_hash_rebuild();
}
/********************************************************************//**
Moves the block to the start of the LRU list if there is a danger
that the block would drift out of the buffer pool. */
UNIV_INLINE
void
buf_block_make_young(
/*=================*/
buf_page_t* bpage) /*!< in: block to make younger */
{
ut_ad(!buf_pool_mutex_own());
/* Note that we read freed_page_clock's without holding any mutex:
this is allowed since the result is used only in heuristics */
if (buf_page_peek_if_too_old(bpage)) {
buf_pool_mutex_enter();
/* There has been freeing activity in the LRU list:
best to move to the head of the LRU list */
buf_LRU_make_block_young(bpage);
buf_pool_mutex_exit();
}
}
/********************************************************************//**
Moves a page to the start of the buffer pool LRU list. This high-level
function can be used to prevent an important page from from slipping out of
......@@ -1713,13 +1686,15 @@ buf_page_get_zip(
got_block:
must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
buf_pool_mutex_exit();
if (buf_page_peek_if_too_old(bpage)) {
buf_LRU_make_block_young(bpage);
}
buf_page_set_accessed(bpage, TRUE);
buf_page_set_accessed(bpage);
mutex_exit(block_mutex);
buf_pool_mutex_exit();
buf_block_make_young(bpage);
mutex_exit(block_mutex);
#ifdef UNIV_DEBUG_FILE_ACCESSES
ut_a(!bpage->file_page_was_freed);
......@@ -2000,7 +1975,7 @@ buf_page_get_gen(
mtr_t* mtr) /*!< in: mini-transaction */
{
buf_block_t* block;
ibool accessed;
unsigned access_time;
ulint fix_type;
ibool must_read;
......@@ -2243,17 +2218,20 @@ buf_page_get_gen(
UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
buf_block_buf_fix_inc(block, file, line);
buf_pool_mutex_exit();
mutex_exit(&block->mutex);
/* Check if this is the first access to the page */
accessed = buf_page_is_accessed(&block->page);
access_time = buf_page_is_accessed(&block->page);
buf_page_set_accessed(&block->page, TRUE);
if (buf_page_peek_if_too_old(&block->page)) {
buf_LRU_make_block_young(&block->page);
}
mutex_exit(&block->mutex);
buf_page_set_accessed(&block->page);
buf_block_make_young(&block->page);
buf_pool_mutex_exit();
#ifdef UNIV_DEBUG_FILE_ACCESSES
ut_a(!block->page.file_page_was_freed);
......@@ -2306,7 +2284,7 @@ buf_page_get_gen(
mtr_memo_push(mtr, block, fix_type);
if (!accessed) {
if (!access_time) {
/* In the case of a first access, try to apply linear
read-ahead */
......@@ -2336,7 +2314,7 @@ buf_page_optimistic_get_func(
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mini-transaction */
{
ibool accessed;
unsigned access_time;
ibool success;
ulint fix_type;
......@@ -2353,14 +2331,21 @@ buf_page_optimistic_get_func(
}
buf_block_buf_fix_inc(block, file, line);
accessed = buf_page_is_accessed(&block->page);
buf_page_set_accessed(&block->page, TRUE);
mutex_exit(&block->mutex);
buf_block_make_young(&block->page);
buf_pool_mutex_enter();
/* Check if this is the first access to the page */
access_time = buf_page_is_accessed(&block->page);
if (buf_page_peek_if_too_old(&block->page)) {
buf_LRU_make_block_young(&block->page);
}
buf_page_set_accessed(&block->page);
buf_pool_mutex_exit();
ut_ad(!ibuf_inside()
|| ibuf_page(buf_block_get_space(block),
......@@ -2412,7 +2397,7 @@ buf_page_optimistic_get_func(
#ifdef UNIV_DEBUG_FILE_ACCESSES
ut_a(block->page.file_page_was_freed == FALSE);
#endif
if (UNIV_UNLIKELY(!accessed)) {
if (UNIV_UNLIKELY(!access_time)) {
/* In the case of a first access, try to apply linear
read-ahead */
......@@ -2473,10 +2458,16 @@ buf_page_get_known_nowait(
mutex_exit(&block->mutex);
if (mode == BUF_MAKE_YOUNG) {
buf_block_make_young(&block->page);
buf_pool_mutex_enter();
if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
buf_LRU_make_block_young(&block->page);
}
buf_page_set_accessed(&block->page);
buf_pool_mutex_exit();
ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
if (rw_latch == RW_S_LATCH) {
......@@ -2608,10 +2599,10 @@ buf_page_init_low(
buf_page_t* bpage) /*!< in: block to init */
{
bpage->flush_type = BUF_FLUSH_LRU;
bpage->accessed = FALSE;
bpage->io_fix = BUF_IO_NONE;
bpage->buf_fix_count = 0;
bpage->freed_page_clock = 0;
bpage->access_time = 0;
bpage->newest_modification = 0;
bpage->oldest_modification = 0;
HASH_INVALIDATE(bpage, hash);
......@@ -2990,12 +2981,12 @@ buf_page_create(
rw_lock_x_unlock(&block->lock);
}
buf_page_set_accessed(&block->page);
buf_pool_mutex_exit();
mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
buf_page_set_accessed(&block->page, TRUE);
mutex_exit(&block->mutex);
/* Delete possible entries for the page from the insert buffer:
......@@ -3528,6 +3519,7 @@ buf_print(void)
"n pending decompressions %lu\n"
"n pending reads %lu\n"
"n pending flush LRU %lu list %lu single page %lu\n"
"pages made young %lu, not young %lu\n"
"pages read %lu, created %lu, written %lu\n",
(ulong) size,
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
......@@ -3538,6 +3530,8 @@ buf_print(void)
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
(ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
(ulong) buf_pool->n_pages_made_young,
(ulong) buf_pool->n_pages_not_made_young,
(ulong) buf_pool->n_pages_read, buf_pool->n_pages_created,
(ulong) buf_pool->n_pages_written);
......@@ -3744,10 +3738,9 @@ buf_print_io(
{
time_t current_time;
double time_elapsed;
ulint size;
ulint n_gets_diff;
ut_ad(buf_pool);
size = buf_pool->curr_size;
buf_pool_mutex_enter();
......@@ -3755,12 +3748,14 @@ buf_print_io(
"Buffer pool size %lu\n"
"Free buffers %lu\n"
"Database pages %lu\n"
"Old database pages %lu\n"
"Modified db pages %lu\n"
"Pending reads %lu\n"
"Pending writes: LRU %lu, flush list %lu, single page %lu\n",
(ulong) size,
(ulong) buf_pool->curr_size,
(ulong) UT_LIST_GET_LEN(buf_pool->free),
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
(ulong) buf_pool->LRU_old_len,
(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
(ulong) buf_pool->n_pend_reads,
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
......@@ -3775,8 +3770,18 @@ buf_print_io(
buf_pool->last_printout_time = current_time;
fprintf(file,
"Pages made young %lu, not young %lu\n"
"%.2f youngs/s, %.2f non-youngs/s\n"
"Pages read %lu, created %lu, written %lu\n"
"%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
(ulong) buf_pool->n_pages_made_young,
(ulong) buf_pool->n_pages_not_made_young,
(buf_pool->n_pages_made_young
- buf_pool->n_pages_made_young_old)
/ time_elapsed,
(buf_pool->n_pages_not_made_young
- buf_pool->n_pages_not_made_young_old)
/ time_elapsed,
(ulong) buf_pool->n_pages_read,
(ulong) buf_pool->n_pages_created,
(ulong) buf_pool->n_pages_written,
......@@ -3787,19 +3792,33 @@ buf_print_io(
(buf_pool->n_pages_written - buf_pool->n_pages_written_old)
/ time_elapsed);
if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
fprintf(file, "Buffer pool hit rate %lu / 1000\n",
n_gets_diff = buf_pool->n_page_gets - buf_pool->n_page_gets_old;
if (n_gets_diff) {
fprintf(file,
"Buffer pool hit rate %lu / 1000,"
" young-making rate %lu / 1000 not %lu / 1000\n",
(ulong)
(1000 - ((1000 * (buf_pool->n_pages_read
- buf_pool->n_pages_read_old))
/ (buf_pool->n_page_gets
- buf_pool->n_page_gets_old))));
/ n_gets_diff)),
(ulong)
(1000 * (buf_pool->n_pages_made_young
- buf_pool->n_pages_made_young_old)
/ n_gets_diff),
(ulong)
(1000 * (buf_pool->n_pages_not_made_young
- buf_pool->n_pages_not_made_young_old)
/ n_gets_diff));
} else {
fputs("No buffer pool page gets since the last printout\n",
file);
}
buf_pool->n_page_gets_old = buf_pool->n_page_gets;
buf_pool->n_pages_made_young_old = buf_pool->n_pages_made_young;
buf_pool->n_pages_not_made_young_old
= buf_pool->n_pages_not_made_young;
buf_pool->n_pages_read_old = buf_pool->n_pages_read;
buf_pool->n_pages_created_old = buf_pool->n_pages_created;
buf_pool->n_pages_written_old = buf_pool->n_pages_written;
......
This diff is collapsed.
......@@ -164,165 +164,6 @@ buf_read_page_low(
return(1);
}
/********************************************************************//**
Applies a random read-ahead in buf_pool if there are at least a threshold
value of accessed pages from the random read-ahead area. Does not read any
page, not even the one at the position (space, offset), if the read-ahead
mechanism is not activated. NOTE 1: the calling thread may own latches on
pages: to avoid deadlocks this function must be written such that it cannot
end up waiting for these latches! NOTE 2: the calling thread must want
access to the page given: this rule is set to prevent unintended read-aheads
performed by ibuf routines, a situation which could result in a deadlock if
the OS does not support asynchronous i/o.
@return number of page read requests issued; NOTE that if we read ibuf
pages, it may happen that the page at the given page number does not
get read even if we return a positive value! */
static
ulint
buf_read_ahead_random(
/*==================*/
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
ulint offset) /*!< in: page number of a page which the current thread
wants to access */
{
ib_int64_t tablespace_version;
ulint recent_blocks = 0;
ulint count;
ulint LRU_recent_limit;
ulint ibuf_mode;
ulint low, high;
ulint err;
ulint i;
ulint buf_read_ahead_random_area;
/* We have currently disabled random readahead */
return(0);
if (srv_startup_is_before_trx_rollback_phase) {
/* No read-ahead to avoid thread deadlocks */
return(0);
}
if (ibuf_bitmap_page(zip_size, offset)
|| trx_sys_hdr_page(space, offset)) {
/* If it is an ibuf bitmap page or trx sys hdr, we do
no read-ahead, as that could break the ibuf page access
order */
return(0);
}
/* Remember the tablespace version before we ask te tablespace size
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
do not try to read outside the bounds of the tablespace! */
tablespace_version = fil_space_get_version(space);
buf_read_ahead_random_area = BUF_READ_AHEAD_RANDOM_AREA;
low = (offset / buf_read_ahead_random_area)
* buf_read_ahead_random_area;
high = (offset / buf_read_ahead_random_area + 1)
* buf_read_ahead_random_area;
if (high > fil_space_get_size(space)) {
high = fil_space_get_size(space);
}
/* Get the minimum LRU_position field value for an initial segment
of the LRU list, to determine which blocks have recently been added
to the start of the list. */
LRU_recent_limit = buf_LRU_get_recent_limit();
buf_pool_mutex_enter();
if (buf_pool->n_pend_reads
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
buf_pool_mutex_exit();
return(0);
}
/* Count how many blocks in the area have been recently accessed,
that is, reside near the start of the LRU list. */
for (i = low; i < high; i++) {
const buf_page_t* bpage = buf_page_hash_get(space, i);
if (bpage
&& buf_page_is_accessed(bpage)
&& (buf_page_get_LRU_position(bpage) > LRU_recent_limit)) {
recent_blocks++;
if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
buf_pool_mutex_exit();
goto read_ahead;
}
}
}
buf_pool_mutex_exit();
/* Do nothing */
return(0);
read_ahead:
/* Read all the suitable blocks within the area */
if (ibuf_inside()) {
ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
} else {
ibuf_mode = BUF_READ_ANY_PAGE;
}
count = 0;
for (i = low; i < high; i++) {
/* It is only sensible to do read-ahead in the non-sync aio
mode: hence FALSE as the first parameter */
if (!ibuf_bitmap_page(zip_size, i)) {
count += buf_read_page_low(
&err, FALSE,
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
space, zip_size, FALSE,
tablespace_version, i);
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: in random"
" readahead trying to access\n"
"InnoDB: tablespace %lu page %lu,\n"
"InnoDB: but the tablespace does not"
" exist or is just being dropped.\n",
(ulong) space, (ulong) i);
}
}
}
/* In simulated aio we wake the aio handler threads only after
queuing all aio requests, in native aio the following call does
nothing: */
os_aio_simulated_wake_handler_threads();
#ifdef UNIV_DEBUG
if (buf_debug_prints && (count > 0)) {
fprintf(stderr,
"Random read-ahead space %lu offset %lu pages %lu\n",
(ulong) space, (ulong) offset,
(ulong) count);
}
#endif /* UNIV_DEBUG */
++srv_read_ahead_rnd;
return(count);
}
/********************************************************************//**
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
......@@ -341,20 +182,17 @@ buf_read_page(
{
ib_int64_t tablespace_version;
ulint count;
ulint count2;
ulint err;
tablespace_version = fil_space_get_version(space);
count = buf_read_ahead_random(space, zip_size, offset);
/* We do the i/o in the synchronous aio mode to save thread
switches: hence TRUE */
count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
zip_size, FALSE,
tablespace_version, offset);
srv_buf_pool_reads+= count2;
count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
zip_size, FALSE,
tablespace_version, offset);
srv_buf_pool_reads += count;
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
......@@ -371,7 +209,7 @@ buf_read_page(
/* Increment number of I/O operations used for LRU policy. */
buf_LRU_stat_inc_io();
return(count + count2);
return(count);
}
/********************************************************************//**
......@@ -498,9 +336,17 @@ buf_read_ahead_linear(
fail_count++;
} else if (pred_bpage) {
int res = (ut_ulint_cmp(
buf_page_get_LRU_position(bpage),
buf_page_get_LRU_position(pred_bpage)));
/* Note that buf_page_is_accessed() returns
the time of the first access. If some blocks
of the extent existed in the buffer pool at
the time of a linear access pattern, the first
access times may be nonmonotonic, even though
the latest access times were linear. The
threshold (srv_read_ahead_factor) should help
a little against this. */
int res = ut_ulint_cmp(
buf_page_is_accessed(bpage),
buf_page_is_accessed(pred_bpage));
/* Accesses not in the right order */
if (res != 0 && res != asc_or_desc) {
fail_count++;
......
......@@ -72,6 +72,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
/* Include necessary InnoDB headers */
extern "C" {
#include "univ.i"
#include "buf0lru.h"
#include "btr0sea.h"
#include "os0file.h"
#include "os0thread.h"
......@@ -157,6 +158,10 @@ static ulong innobase_write_io_threads;
static long long innobase_buffer_pool_size, innobase_log_file_size;
/** Percentage of the buffer pool to reserve for 'old' blocks.
Connected to buf_LRU_old_ratio. */
static uint innobase_old_blocks_pct;
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
......@@ -2208,6 +2213,9 @@ innobase_init(
ut_a(0 == strcmp(my_charset_latin1.name, "latin1_swedish_ci"));
srv_latin1_ordering = my_charset_latin1.sort_order;
innobase_old_blocks_pct = buf_LRU_old_ratio_update(
innobase_old_blocks_pct, FALSE);
innobase_commit_concurrency_init_default();
/* Since we in this module access directly the fields of a trx
......@@ -9614,6 +9622,25 @@ innodb_adaptive_hash_index_update(
}
}
/****************************************************************//**
Update the system variable innodb_old_blocks_pct using the "saved"
value. This function is registered as a callback with MySQL. */
static
void
innodb_old_blocks_pct_update(
/*=========================*/
THD* thd, /*!< in: thread handle */
struct st_mysql_sys_var* var, /*!< in: pointer to
system variable */
void* var_ptr,/*!< out: where the
formal string goes */
const void* save) /*!< in: immediate result
from check function */
{
innobase_old_blocks_pct = buf_LRU_old_ratio_update(
*static_cast<const uint*>(save), TRUE);
}
/*************************************************************//**
Check if it is a valid value of innodb_change_buffering. This function is
registered as a callback with MySQL.
......@@ -9890,6 +9917,18 @@ static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
"Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
NULL, NULL, 1, 1, 10, 0);
static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct,
PLUGIN_VAR_RQCMDARG,
"Percentage of the buffer pool to reserve for 'old' blocks.",
NULL, innodb_old_blocks_pct_update, 100 * 3 / 8, 5, 95, 0);
static MYSQL_SYSVAR_UINT(old_blocks_time, buf_LRU_old_threshold_ms,
PLUGIN_VAR_RQCMDARG,
"Move blocks to the 'new' end of the buffer pool if the first access"
" was at least this many milliseconds ago."
" The timeout is disabled if 0 (the default).",
NULL, NULL, 0, 0, UINT_MAX32, 0);
static MYSQL_SYSVAR_LONG(open_files, innobase_open_files,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"How many files at the maximum InnoDB keeps open at the same time.",
......@@ -9988,6 +10027,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(adaptive_flushing),
MYSQL_SYSVAR(max_purge_lag),
MYSQL_SYSVAR(mirrored_log_groups),
MYSQL_SYSVAR(old_blocks_pct),
MYSQL_SYSVAR(old_blocks_time),
MYSQL_SYSVAR(open_files),
MYSQL_SYSVAR(rollback_on_timeout),
MYSQL_SYSVAR(stats_on_metadata),
......
......@@ -707,15 +707,6 @@ buf_page_belongs_to_unzip_LRU(
/*==========================*/
const buf_page_t* bpage) /*!< in: pointer to control block */
__attribute__((pure));
/*********************************************************************//**
Determine the approximate LRU list position of a block.
@return LRU list position */
UNIV_INLINE
ulint
buf_page_get_LRU_position(
/*======================*/
const buf_page_t* bpage) /*!< in: control block */
__attribute__((pure));
/*********************************************************************//**
Gets the mutex of a block.
......@@ -816,22 +807,22 @@ buf_page_set_old(
buf_page_t* bpage, /*!< in/out: control block */
ibool old); /*!< in: old */
/*********************************************************************//**
Determine if a block has been accessed in the buffer pool.
@return TRUE if accessed */
Determine the time of last access a block in the buffer pool.
@return ut_time_ms() at the time of last access, 0 if not accessed */
UNIV_INLINE
ibool
unsigned
buf_page_is_accessed(
/*=================*/
const buf_page_t* bpage) /*!< in: control block */
__attribute__((pure));
__attribute__((nonnull, pure));
/*********************************************************************//**
Flag a block accessed. */
UNIV_INLINE
void
buf_page_set_accessed(
/*==================*/
buf_page_t* bpage, /*!< in/out: control block */
ibool accessed); /*!< in: accessed */
buf_page_t* bpage) /*!< in/out: control block */
__attribute__((nonnull));
/*********************************************************************//**
Gets the buf_block_t handle of a buffered file block if an uncompressed
page frame exists, or NULL.
......@@ -1017,14 +1008,6 @@ buf_block_hash_get(
/*===============*/
ulint space, /*!< in: space id */
ulint offset);/*!< in: offset of the page within space */
/*******************************************************************//**
Increments the pool clock by one and returns its new value. Remember that
in the 32 bit version the clock wraps around at 4 billion!
@return new clock value */
UNIV_INLINE
ulint
buf_pool_clock_tic(void);
/*====================*/
/*********************************************************************//**
Gets the current length of the free list of buffer blocks.
@return length of the free list */
......@@ -1064,16 +1047,10 @@ struct buf_page_struct{
flushed to disk, this tells the
flush_type.
@see enum buf_flush */
unsigned accessed:1; /*!< TRUE if the page has been accessed
while in the buffer pool: read-ahead
may read in pages which have not been
accessed yet; a thread is allowed to
read this for heuristic purposes
without holding any mutex or latch */
unsigned io_fix:2; /*!< type of pending I/O operation;
also protected by buf_pool_mutex
@see enum buf_io_fix */
unsigned buf_fix_count:24;/*!< count of how manyfold this block
unsigned buf_fix_count:25;/*!< count of how manyfold this block
is currently bufferfixed */
/* @} */
#endif /* !UNIV_HOTBACKUP */
......@@ -1152,17 +1129,7 @@ struct buf_page_struct{
#endif /* UNIV_DEBUG */
unsigned old:1; /*!< TRUE if the block is in the old
blocks in the LRU list */
unsigned LRU_position:31;/*!< value which monotonically
decreases (or may stay
constant if old==TRUE) toward
the end of the LRU list, if
buf_pool->ulint_clock has not
wrapped around: NOTE that this
value can only be used in
heuristic algorithms, because
of the possibility of a
wrap-around! */
unsigned freed_page_clock:32;/*!< the value of
unsigned freed_page_clock:31;/*!< the value of
buf_pool->freed_page_clock
when this block was the last
time put to the head of the
......@@ -1170,6 +1137,9 @@ struct buf_page_struct{
to read this for heuristic
purposes without holding any
mutex or latch */
unsigned access_time:32; /*!< time of first access, or
0 if the block was never accessed
in the buffer pool */
/* @} */
# ifdef UNIV_DEBUG_FILE_ACCESSES
ibool file_page_was_freed;
......@@ -1338,8 +1308,17 @@ struct buf_pool_struct{
ulint n_pend_reads; /*!< number of pending read operations */
ulint n_pend_unzip; /*!< number of pending decompressions */
time_t last_printout_time; /*!< when buf_print was last time
time_t last_printout_time;
/*!< when buf_print_io was last time
called */
ulint n_pages_made_young;
/*!< number of pages made young, in
calls to buf_LRU_make_block_young() */
ulint n_pages_not_made_young;
/*!< number of pages not made
young because the first access
was not long enough ago, in
buf_page_peek_if_too_old() */
ulint n_pages_read; /*!< number read operations */
ulint n_pages_written;/*!< number write operations */
ulint n_pages_created;/*!< number of pages created
......@@ -1350,14 +1329,24 @@ struct buf_pool_struct{
counted as page gets; this field
is NOT protected by the buffer
pool mutex */
ulint n_page_gets_old;/*!< n_page_gets when buf_print was
last time called: used to calculate
ulint n_page_gets_old;/*!< n_page_gets when buf_print_io was
called last time: used to calculate
hit rate */
ulint n_pages_read_old;/*!< n_pages_read when buf_print was
last time called */
ulint n_pages_written_old;/*!< number write operations */
ulint n_pages_created_old;/*!< number of pages created in
the pool with no read */
ulint n_pages_made_young_old;
/*!< n_pages_made_young when
buf_print_io was called last time */
ulint n_pages_not_made_young_old;
/*!< n_pages_not_made_young when
buf_print_io was called last time */
ulint n_pages_read_old;
/*!< n_pages_read when buf_print_io
was called last time */
ulint n_pages_written_old;
/*!< n_pages_written when buf_print_io
was called last time */
ulint n_pages_created_old;
/*!< n_pages_created when buf_print_io
was called last time */
/* @} */
/** @name Page flushing algorithm fields */
/* @{ */
......@@ -1375,10 +1364,6 @@ struct buf_pool_struct{
/*!< this is in the set state
when there is no flush batch
of the given type running */
ulint ulint_clock; /*!< a sequence number used to count
time. NOTE! This counter wraps
around at 4 billion (if ulint ==
32 bits)! */
ulint freed_page_clock;/*!< a sequence number used
to count the number of buffer
blocks removed from the end of
......@@ -1402,9 +1387,11 @@ struct buf_pool_struct{
block list */
UT_LIST_BASE_NODE_T(buf_page_t) LRU;
/*!< base node of the LRU list */
buf_page_t* LRU_old; /*!< pointer to the about 3/8 oldest
blocks in the LRU list; NULL if LRU
length less than BUF_LRU_OLD_MIN_LEN;
buf_page_t* LRU_old; /*!< pointer to the about
buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
oldest blocks in the LRU list;
NULL if LRU length less than
BUF_LRU_OLD_MIN_LEN;
NOTE: when LRU_old != NULL, its length
should always equal LRU_old_len */
ulint LRU_old_len; /*!< length of the LRU list from
......
......@@ -72,9 +72,24 @@ buf_page_peek_if_too_old(
/*=====================*/
const buf_page_t* bpage) /*!< in: block to make younger */
{
return(buf_pool->freed_page_clock
>= buf_page_get_freed_page_clock(bpage)
+ 1 + (buf_pool->curr_size / 4));
if (buf_LRU_old_threshold_ms && bpage->old) {
unsigned access_time = buf_page_is_accessed(bpage);
if (access_time && ut_time_ms() - access_time
>= buf_LRU_old_threshold_ms) {
return(TRUE);
}
buf_pool->n_pages_not_made_young++;
return(FALSE);
} else {
/* FIXME: bpage->freed_page_clock is 31 bits */
return((buf_pool->freed_page_clock & ~(1 << 31))
> bpage->freed_page_clock
+ (buf_pool->curr_size
* (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio)
/ (BUF_LRU_OLD_RATIO_DIV * 4)));
}
}
/*********************************************************************//**
......@@ -118,22 +133,6 @@ buf_pool_get_oldest_modification(void)
return(lsn);
}
/*******************************************************************//**
Increments the buf_pool clock by one and returns its new value. Remember
that in the 32 bit version the clock wraps around at 4 billion!
@return new clock value */
UNIV_INLINE
ulint
buf_pool_clock_tic(void)
/*====================*/
{
ut_ad(buf_pool_mutex_own());
buf_pool->ulint_clock++;
return(buf_pool->ulint_clock);
}
#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
......@@ -279,21 +278,6 @@ buf_page_belongs_to_unzip_LRU(
&& buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
}
/*********************************************************************//**
Determine the approximate LRU list position of a block.
@return LRU list position */
UNIV_INLINE
ulint
buf_page_get_LRU_position(
/*======================*/
const buf_page_t* bpage) /*!< in: control block */
{
ut_ad(buf_page_in_file(bpage));
ut_ad(buf_pool_mutex_own());
return(bpage->LRU_position);
}
/*********************************************************************//**
Gets the mutex of a block.
@return pointer to mutex protecting bpage */
......@@ -487,17 +471,17 @@ buf_page_set_old(
}
/*********************************************************************//**
Determine if a block has been accessed in the buffer pool.
@return TRUE if accessed */
Determine the time of last access a block in the buffer pool.
@return ut_time_ms() at the time of last access, 0 if not accessed */
UNIV_INLINE
ibool
unsigned
buf_page_is_accessed(
/*=================*/
const buf_page_t* bpage) /*!< in: control block */
{
ut_ad(buf_page_in_file(bpage));
return(bpage->accessed);
return(bpage->access_time);
}
/*********************************************************************//**
......@@ -506,13 +490,15 @@ UNIV_INLINE
void
buf_page_set_accessed(
/*==================*/
buf_page_t* bpage, /*!< in/out: control block */
ibool accessed) /*!< in: accessed */
buf_page_t* bpage) /*!< in/out: control block */
{
ut_a(buf_page_in_file(bpage));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(buf_pool_mutex_own());
bpage->accessed = accessed;
if (!bpage->access_time) {
/* Make this the time of the first access. */
bpage->access_time = ut_time_ms();
}
}
/*********************************************************************//**
......
......@@ -69,7 +69,7 @@ These are low-level functions
#########################################################################*/
/** Minimum LRU list length for which the LRU_old pointer is defined */
#define BUF_LRU_OLD_MIN_LEN 80
#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
/** Maximum LRU list search length in buf_flush_LRU_recommendation() */
#define BUF_LRU_FREE_SEARCH_LEN (5 + 2 * BUF_READ_AHEAD_AREA)
......@@ -84,15 +84,6 @@ void
buf_LRU_invalidate_tablespace(
/*==========================*/
ulint id); /*!< in: space id */
/******************************************************************//**
Gets the minimum LRU_position field for the blocks in an initial segment
(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
guaranteed to be precise, because the ulint_clock may wrap around.
@return the limit; zero if could not determine it */
UNIV_INTERN
ulint
buf_LRU_get_recent_limit(void);
/*==========================*/
/********************************************************************//**
Insert a compressed block into buf_pool->zip_clean in the LRU order. */
UNIV_INTERN
......@@ -201,6 +192,18 @@ void
buf_LRU_make_block_old(
/*===================*/
buf_page_t* bpage); /*!< in: control block */
/**********************************************************************//**
Updates buf_LRU_old_ratio.
@return updated old_pct */
UNIV_INTERN
uint
buf_LRU_old_ratio_update(
/*=====================*/
uint old_pct,/*!< in: Reserve this percentage of
the buffer pool for "old" blocks. */
ibool adjust);/*!< in: TRUE=adjust the LRU list;
FALSE=just assign buf_LRU_old_ratio
during the initialization of InnoDB */
/********************************************************************//**
Update the historical stats that we are collecting for LRU eviction
policy at the end of each interval. */
......@@ -227,6 +230,35 @@ buf_LRU_print(void);
/*===============*/
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
/** @name Heuristics for detecting index scan @{ */
/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for
"old" blocks. Protected by buf_pool_mutex. */
extern uint buf_LRU_old_ratio;
/** The denominator of buf_LRU_old_ratio. */
#define BUF_LRU_OLD_RATIO_DIV 1024
/** Maximum value of buf_LRU_old_ratio.
@see buf_LRU_old_adjust_len
@see buf_LRU_old_ratio_update */
#define BUF_LRU_OLD_RATIO_MAX BUF_LRU_OLD_RATIO_DIV
/** Minimum value of buf_LRU_old_ratio.
@see buf_LRU_old_adjust_len
@see buf_LRU_old_ratio_update
The minimum must exceed
(BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */
#define BUF_LRU_OLD_RATIO_MIN 51
#if BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX
# error "BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX"
#endif
#if BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV
# error "BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV"
#endif
/** Move blocks to "new" LRU list only if the first access was at
least this many milliseconds ago. Not protected by any mutex or latch. */
extern uint buf_LRU_old_threshold_ms;
/* @} */
/** @brief Statistics for selecting the LRU list for eviction.
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
......
......@@ -239,6 +239,15 @@ ullint
ut_time_us(
/*=======*/
ullint* tloc); /*!< out: us since epoch, if non-NULL */
/**********************************************************//**
Returns the number of milliseconds since some epoch. The
value may wrap around. It should only be used for heuristic
purposes.
@return ms since epoch */
UNIV_INTERN
uint
ut_time_ms(void);
/*============*/
/**********************************************************//**
Returns the difference of two times in seconds.
......
......@@ -199,6 +199,23 @@ ut_time_us(
return(us);
}
/**********************************************************//**
Returns the number of milliseconds since some epoch. The
value may wrap around. It should only be used for heuristic
purposes.
@return ms since epoch */
UNIV_INTERN
uint
ut_time_ms(void)
/*============*/
{
struct timeval tv;
ut_gettimeofday(&tv, NULL);
return((uint) tv.tv_sec * 1000 + tv.tv_usec / 1000);
}
/**********************************************************//**
Returns the difference of two times in seconds.
@return time2 - time1 expressed in seconds */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment