Commit e60e6505 authored by Inaam Rana's avatar Inaam Rana

Bug 12635227 - 61188: DROP TABLE EXTREMELY SLOW

approved by: Marko
rb://681

Coalescing of free buf_page_t descriptors can prove to be one severe
bottleneck in performance of compression. One such workload where it
hurts badly is DROP TABLE. This patch removes buf_page_t allocations
from buf_buddy and uses ut_malloc instead.
In order to further reduce overhead of colaescing we no longer attempt
to coalesce a block if the corresponding free_list is less than 16 in
size.
parent 7de029da
2011-06-16 The InnoDB Team
* btr/btr0cur.c, buf/buf0buddy.c, buf/buf0buf.c, buf/buf0lru.c,
include/buf0buddy.h, include/buf0buddy.ic, include/buf0buf.h,
include/buf0buf.ic, include/buf0lru.h, include/buf0types.h:
Fix Bug#61188 DROP TABLE extremely slow
2011-06-16 The InnoDB Team
* buf/buf0buddy.c, buf/buf0buf.c, buf/buf0flu.c, buf/buf0lru.c,
......
......@@ -3864,7 +3864,7 @@ btr_blob_free(
&& buf_block_get_space(block) == space
&& buf_block_get_page_no(block) == page_no) {
if (buf_LRU_free_block(&block->page, all) != BUF_LRU_FREED
if (!buf_LRU_free_block(&block->page, all)
&& all && block->page.zip.data) {
/* Attempt to deallocate the uncompressed page
if the whole block cannot be deallocted. */
......
This diff is collapsed.
......@@ -1358,7 +1358,7 @@ err_exit:
mutex_enter(block_mutex);
/* Discard the uncompressed page frame if possible. */
if (buf_LRU_free_block(bpage, FALSE) == BUF_LRU_FREED) {
if (buf_LRU_free_block(bpage, FALSE)) {
mutex_exit(block_mutex);
goto lookup;
......@@ -1699,13 +1699,8 @@ loop:
if (block) {
/* If the guess is a compressed page descriptor that
has been allocated by buf_buddy_alloc(), it may have
been invalidated by buf_buddy_relocate(). In that
case, block could point to something that happens to
contain the expected bits in block->page. Similarly,
the guess may be pointing to a buffer pool chunk that
has been released when resizing the buffer pool. */
has been allocated by buf_page_alloc_descriptor(),
it may have been freed by buf_relocate(). */
if (!buf_block_is_uncompressed(block)
|| offset != block->page.offset
|| space != block->page.space
......@@ -1889,11 +1884,10 @@ wait_until_unfixed:
mutex_exit(&buf_pool_zip_mutex);
buf_pool->n_pend_unzip++;
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof *bpage);
buf_pool_mutex_exit();
buf_page_free_descriptor(bpage);
/* Decompress the page and apply buffered operations
while not holding buf_pool_mutex or block->mutex. */
success = buf_zip_decompress(block, srv_use_checksums);
......@@ -1937,7 +1931,7 @@ wait_until_unfixed:
/* Try to evict the block from the buffer pool, to use the
insert buffer as much as possible. */
if (buf_LRU_free_block(&block->page, TRUE) == BUF_LRU_FREED) {
if (buf_LRU_free_block(&block->page, TRUE)) {
buf_pool_mutex_exit();
mutex_exit(&block->mutex);
fprintf(stderr,
......@@ -2551,17 +2545,12 @@ err_exit:
mutex_exit(&block->mutex);
} else {
/* Defer buf_buddy_alloc() until after the block has
been found not to exist. The buf_buddy_alloc() and
buf_buddy_free() calls may be expensive because of
buf_buddy_relocate(). */
/* The compressed page must be allocated before the
control block (bpage), in order to avoid the
invocation of buf_buddy_relocate_block() on
uninitialized data. */
data = buf_buddy_alloc(zip_size, &lru);
bpage = buf_buddy_alloc(sizeof *bpage, &lru);
/* If buf_buddy_alloc() allocated storage from the LRU list,
it released and reacquired buf_pool_mutex. Thus, we must
......@@ -2569,15 +2558,13 @@ err_exit:
if (UNIV_UNLIKELY(lru)
&& UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
/* The block was added by some other thread. */
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof *bpage);
buf_buddy_free(data, zip_size);
bpage = NULL;
goto func_exit;
}
bpage = buf_page_alloc_descriptor();
page_zip_des_init(&bpage->zip);
page_zip_set_size(&bpage->zip, zip_size);
bpage->zip.data = data;
......
......@@ -355,7 +355,7 @@ scan_again:
while (bpage != NULL) {
buf_page_t* prev_bpage;
ibool prev_bpage_buf_fix = FALSE;
mutex_t* block_mutex = NULL;
ut_a(buf_page_in_file(bpage));
......@@ -368,18 +368,21 @@ scan_again:
if (buf_page_get_space(bpage) != id) {
/* Skip this block, as it does not belong to
the space that is being invalidated. */
goto next_page;
} else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
/* We cannot remove this page during this scan
yet; maybe the system is currently reading it
in, or flushing the modifications to the file */
all_freed = FALSE;
goto next_page;
} else {
mutex_t* block_mutex = buf_page_get_mutex(bpage);
block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
if (bpage->buf_fix_count > 0) {
mutex_exit(block_mutex);
/* We cannot remove this page during
this scan yet; maybe the system is
currently reading it in, or flushing
......@@ -389,6 +392,9 @@ scan_again:
goto next_page;
}
}
ut_ad(mutex_own(block_mutex));
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
......@@ -400,37 +406,7 @@ scan_again:
#endif
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
/* This is a compressed-only block
descriptor. Ensure that prev_bpage
cannot be relocated when bpage is freed. */
if (UNIV_LIKELY(prev_bpage != NULL)) {
switch (buf_page_get_state(
prev_bpage)) {
case BUF_BLOCK_FILE_PAGE:
/* Descriptors of uncompressed
blocks will not be relocated,
because we are holding the
buf_pool_mutex. */
break;
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
/* Descriptors of compressed-
only blocks can be relocated,
unless they are buffer-fixed.
Because both bpage and
prev_bpage are protected by
buf_pool_zip_mutex, it is
not necessary to acquire
further mutexes. */
ut_ad(&buf_pool_zip_mutex
== block_mutex);
ut_ad(mutex_own(block_mutex));
prev_bpage_buf_fix = TRUE;
prev_bpage->buf_fix_count++;
break;
default:
ut_error;
}
}
descriptor. Do nothing. */
} else if (((buf_block_t*) bpage)->is_hashed) {
ulint page_no;
ulint zip_size;
......@@ -459,36 +435,16 @@ scan_again:
if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
!= BUF_BLOCK_ZIP_FREE) {
buf_LRU_block_free_hashed_page((buf_block_t*)
bpage);
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
mutex_exit(block_mutex);
} else {
/* The block_mutex should have been
released by buf_LRU_block_remove_hashed_page()
when it returns BUF_BLOCK_ZIP_FREE. */
/* The block_mutex should have been released
by buf_LRU_block_remove_hashed_page() when it
returns BUF_BLOCK_ZIP_FREE. */
ut_ad(block_mutex == &buf_pool_zip_mutex);
ut_ad(!mutex_own(block_mutex));
if (prev_bpage_buf_fix) {
/* We temporarily buffer-fixed
prev_bpage, so that
buf_buddy_free() could not
relocate it, in case it was a
compressed-only block
descriptor. */
mutex_enter(block_mutex);
ut_ad(prev_bpage->buf_fix_count > 0);
prev_bpage->buf_fix_count--;
mutex_exit(block_mutex);
}
goto next_page_no_mutex;
}
next_page:
mutex_exit(block_mutex);
}
next_page_no_mutex:
bpage = prev_bpage;
}
......@@ -574,7 +530,7 @@ buf_LRU_free_from_unzip_LRU_list(
UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
enum buf_lru_free_block_status freed;
ibool freed;
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->in_unzip_LRU_list);
......@@ -584,24 +540,9 @@ buf_LRU_free_from_unzip_LRU_list(
freed = buf_LRU_free_block(&block->page, FALSE);
mutex_exit(&block->mutex);
switch (freed) {
case BUF_LRU_FREED:
if (freed) {
return(TRUE);
case BUF_LRU_CANNOT_RELOCATE:
/* If we failed to relocate, try
regular LRU eviction. */
return(FALSE);
case BUF_LRU_NOT_FREED:
/* The block was buffer-fixed or I/O-fixed.
Keep looking. */
continue;
}
/* inappropriate return value from
buf_LRU_free_block() */
ut_error;
}
return(FALSE);
......@@ -632,10 +573,9 @@ buf_LRU_free_from_common_LRU_list(
UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
enum buf_lru_free_block_status freed;
ibool freed;
unsigned accessed;
mutex_t* block_mutex
= buf_page_get_mutex(bpage);
mutex_t* block_mutex = buf_page_get_mutex(bpage);
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
......@@ -645,8 +585,7 @@ buf_LRU_free_from_common_LRU_list(
freed = buf_LRU_free_block(bpage, TRUE);
mutex_exit(block_mutex);
switch (freed) {
case BUF_LRU_FREED:
if (freed) {
/* Keep track of pages that are evicted without
ever being accessed. This gives us a measure of
the effectiveness of readahead */
......@@ -654,21 +593,7 @@ buf_LRU_free_from_common_LRU_list(
++buf_pool->stat.n_ra_pages_evicted;
}
return(TRUE);
case BUF_LRU_NOT_FREED:
/* The block was dirty, buffer-fixed, or I/O-fixed.
Keep looking. */
continue;
case BUF_LRU_CANNOT_RELOCATE:
/* This should never occur, because we
want to discard the compressed page too. */
break;
}
/* inappropriate return value from
buf_LRU_free_block() */
ut_error;
}
return(FALSE);
......@@ -1350,17 +1275,16 @@ buf_LRU_make_block_old(
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
NOTE: If this function returns BUF_LRU_FREED, it will temporarily
NOTE: If this function returns TRUE, it will temporarily
release buf_pool_mutex. Furthermore, the page frame will no longer be
accessible via bpage.
The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
release these two mutexes after the call. No other
buf_page_get_mutex() may be held when calling this function.
@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
BUF_LRU_NOT_FREED otherwise. */
@return TRUE if freed, FALSE otherwise. */
UNIV_INTERN
enum buf_lru_free_block_status
ibool
buf_LRU_free_block(
/*===============*/
buf_page_t* bpage, /*!< in: block to be freed */
......@@ -1385,7 +1309,7 @@ buf_LRU_free_block(
if (!buf_page_can_relocate(bpage)) {
/* Do not free buffer-fixed or I/O-fixed blocks. */
return(BUF_LRU_NOT_FREED);
return(FALSE);
}
#ifdef UNIV_IBUF_COUNT_DEBUG
......@@ -1397,7 +1321,7 @@ buf_LRU_free_block(
/* Do not completely free dirty blocks. */
if (bpage->oldest_modification) {
return(BUF_LRU_NOT_FREED);
return(FALSE);
}
} else if (bpage->oldest_modification) {
/* Do not completely free dirty blocks. */
......@@ -1405,7 +1329,7 @@ buf_LRU_free_block(
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
ut_ad(buf_page_get_state(bpage)
== BUF_BLOCK_ZIP_DIRTY);
return(BUF_LRU_NOT_FREED);
return(FALSE);
}
goto alloc;
......@@ -1414,14 +1338,8 @@ buf_LRU_free_block(
If it cannot be allocated (without freeing a block
from the LRU list), refuse to free bpage. */
alloc:
buf_pool_mutex_exit_forbid();
b = buf_buddy_alloc(sizeof *b, NULL);
buf_pool_mutex_exit_allow();
if (UNIV_UNLIKELY(!b)) {
return(BUF_LRU_CANNOT_RELOCATE);
}
b = buf_page_alloc_descriptor();
ut_a(b);
memcpy(b, bpage, sizeof *b);
}
......@@ -1589,7 +1507,7 @@ alloc:
mutex_enter(block_mutex);
}
return(BUF_LRU_FREED);
return(TRUE);
}
/******************************************************************//**
......@@ -1809,10 +1727,8 @@ buf_LRU_block_remove_hashed_page(
buf_pool_mutex_exit_forbid();
buf_buddy_free(bpage->zip.data,
page_zip_get_size(&bpage->zip));
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof(*bpage));
buf_pool_mutex_exit_allow();
UNIV_MEM_UNDESC(bpage);
buf_page_free_descriptor(bpage);
return(BUF_BLOCK_ZIP_FREE);
case BUF_BLOCK_FILE_PAGE:
......
/*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -37,24 +37,19 @@ Created December 2006 by Marko Makela
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any
block->mutex. The buf_pool_mutex may only be released and reacquired
if lru != NULL. This function should only be used for allocating
compressed page frames or control blocks (buf_page_t). Allocated
control blocks must be properly initialized immediately after
buf_buddy_alloc() has returned the memory, before releasing
buf_pool_mutex.
@return allocated block, possibly NULL if lru == NULL */
block->mutex. The buf_pool_mutex may be released and reacquired.
This function should only be used for allocating compressed page frames.
@return allocated block, never NULL */
UNIV_INLINE
void*
buf_buddy_alloc(
/*============*/
ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
ulint size, /*!< in: compressed page size
(between PAGE_ZIP_MIN_SIZE and UNIV_PAGE_SIZE) */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
__attribute__((malloc));
and buf_pool_mutex was temporarily released */
__attribute__((malloc, nonnull));
/**********************************************************************//**
Release a block. */
UNIV_INLINE
......
/*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -36,8 +36,8 @@ Created December 2006 by Marko Makela
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
The buf_pool_mutex may only be released and reacquired if lru != NULL.
@return allocated block, possibly NULL if lru==NULL */
The buf_pool_mutex may be released and reacquired.
@return allocated block, never NULL */
UNIV_INTERN
void*
buf_buddy_alloc_low(
......@@ -46,9 +46,8 @@ buf_buddy_alloc_low(
or BUF_BUDDY_SIZES */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
__attribute__((malloc));
and buf_pool_mutex was temporarily released */
__attribute__((malloc, nonnull));
/**********************************************************************//**
Deallocate a block. */
......@@ -74,6 +73,8 @@ buf_buddy_get_slot(
ulint i;
ulint s;
ut_ad(size >= PAGE_ZIP_MIN_SIZE);
for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) {
}
......@@ -84,26 +85,25 @@ buf_buddy_get_slot(
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any
block->mutex. The buf_pool_mutex may only be released and reacquired
if lru != NULL. This function should only be used for allocating
compressed page frames or control blocks (buf_page_t). Allocated
control blocks must be properly initialized immediately after
buf_buddy_alloc() has returned the memory, before releasing
buf_pool_mutex.
@return allocated block, possibly NULL if lru == NULL */
block->mutex. The buf_pool_mutex may be released and reacquired.
This function should only be used for allocating compressed page frames.
@return allocated block, never NULL */
UNIV_INLINE
void*
buf_buddy_alloc(
/*============*/
ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
ulint size, /*!< in: compressed page size
(between PAGE_ZIP_MIN_SIZE and UNIV_PAGE_SIZE) */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
and buf_pool_mutex was temporarily released */
{
ut_ad(buf_pool_mutex_own());
ut_ad(ut_is_2pow(size));
ut_ad(size >= PAGE_ZIP_MIN_SIZE);
ut_ad(size <= UNIV_PAGE_SIZE);
return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
return((byte*) buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
}
/**********************************************************************//**
......@@ -117,6 +117,9 @@ buf_buddy_free(
ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
{
ut_ad(buf_pool_mutex_own());
ut_ad(ut_is_2pow(size));
ut_ad(size >= PAGE_ZIP_MIN_SIZE);
ut_ad(size <= UNIV_PAGE_SIZE);
buf_buddy_free_low(buf, buf_buddy_get_slot(size));
}
......
......@@ -156,6 +156,23 @@ UNIV_INLINE
ib_uint64_t
buf_pool_get_oldest_modification(void);
/*==================================*/
/********************************************************************//**
Allocates a buf_page_t descriptor. This function must succeed. In case
of failure we assert in this function. */
UNIV_INLINE
buf_page_t*
buf_page_alloc_descriptor(void)
/*===========================*/
__attribute__((malloc));
/********************************************************************//**
Free a buf_page_t descriptor. */
UNIV_INLINE
void
buf_page_free_descriptor(
/*=====================*/
buf_page_t* bpage) /*!< in: bpage descriptor to free. */
__attribute__((nonnull));
/********************************************************************//**
Allocates a buffer block.
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
......
......@@ -714,6 +714,35 @@ buf_block_get_lock_hash_val(
return(block->lock_hash_val);
}
/********************************************************************//**
Allocates a buf_page_t descriptor. This function must succeed. In case
of failure we assert in this function.
@return: the allocated descriptor. */
UNIV_INLINE
buf_page_t*
buf_page_alloc_descriptor(void)
/*===========================*/
{
buf_page_t* bpage;
bpage = (buf_page_t*) ut_malloc(sizeof *bpage);
ut_d(memset(bpage, 0, sizeof *bpage));
UNIV_MEM_ALLOC(bpage, sizeof *bpage);
return(bpage);
}
/********************************************************************//**
Free a buf_page_t descriptor. */
UNIV_INLINE
void
buf_page_free_descriptor(
/*=====================*/
buf_page_t* bpage) /*!< in: bpage descriptor to free. */
{
ut_free(bpage);
}
/********************************************************************//**
Allocates a buffer block.
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
......
......@@ -30,18 +30,6 @@ Created 11/5/1995 Heikki Tuuri
#include "ut0byte.h"
#include "buf0types.h"
/** The return type of buf_LRU_free_block() */
enum buf_lru_free_block_status {
/** freed */
BUF_LRU_FREED = 0,
/** not freed because the caller asked to remove the
uncompressed frame but the control block cannot be
relocated */
BUF_LRU_CANNOT_RELOCATE,
/** not freed because of some other reason */
BUF_LRU_NOT_FREED
};
/******************************************************************//**
Tries to remove LRU flushed blocks from the end of the LRU list and put them
to the free list. This is beneficial for the efficiency of the insert buffer
......@@ -98,17 +86,16 @@ buf_LRU_insert_zip_clean(
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
NOTE: If this function returns BUF_LRU_FREED, it will temporarily
NOTE: If this function returns TRUE, it will temporarily
release buf_pool_mutex. Furthermore, the page frame will no longer be
accessible via bpage.
The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
release these two mutexes after the call. No other
buf_page_get_mutex() may be held when calling this function.
@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
BUF_LRU_NOT_FREED otherwise. */
@return TRUE if freed, FALSE otherwise. */
UNIV_INTERN
enum buf_lru_free_block_status
ibool
buf_LRU_free_block(
/*===============*/
buf_page_t* bpage, /*!< in: block to be freed */
......
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -26,6 +26,8 @@ Created 11/17/1995 Heikki Tuuri
#ifndef buf0types_h
#define buf0types_h
#include "page0types.h"
/** Buffer page (uncompressed or compressed) */
typedef struct buf_page_struct buf_page_t;
/** Buffer block for which an uncompressed page exists */
......@@ -58,17 +60,10 @@ enum buf_io_fix {
/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
/* @{ */
#if UNIV_WORD_SIZE <= 4 /* 32-bit system */
/** Base-2 logarithm of the smallest buddy block size */
# define BUF_BUDDY_LOW_SHIFT 6
#else /* 64-bit system */
/** Base-2 logarithm of the smallest buddy block size */
# define BUF_BUDDY_LOW_SHIFT 7
#endif
#define BUF_BUDDY_LOW_SHIFT PAGE_ZIP_MIN_SIZE_SHIFT
#define BUF_BUDDY_LOW (1 << BUF_BUDDY_LOW_SHIFT)
/*!< minimum block size in the binary
buddy system; must be at least
sizeof(buf_page_t) */
#define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
/*!< number of buddy sizes */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment