buf0buf.c, buf0buf.ic, buf0buf.h:

  Reduce memory usage of the buffer headers
Many files:
  Merge InnoDB-4.1 with AWE support
parent edb019ae
...@@ -291,6 +291,7 @@ btr_cur_search_to_nth_level( ...@@ -291,6 +291,7 @@ btr_cur_search_to_nth_level(
&& latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
&& !estimate && !estimate
&& mode != PAGE_CUR_LE_OR_EXTENDS && mode != PAGE_CUR_LE_OR_EXTENDS
&& srv_use_adaptive_hash_indexes
&& btr_search_guess_on_hash(index, info, tuple, mode, && btr_search_guess_on_hash(index, info, tuple, mode,
latch_mode, cursor, latch_mode, cursor,
has_search_latch, mtr)) { has_search_latch, mtr)) {
...@@ -495,9 +496,11 @@ retry_page_get: ...@@ -495,9 +496,11 @@ retry_page_get:
cursor->up_bytes = up_bytes; cursor->up_bytes = up_bytes;
#ifdef BTR_CUR_ADAPT #ifdef BTR_CUR_ADAPT
btr_search_info_update(index, cursor); if (srv_use_adaptive_hash_indexes) {
#endif
btr_search_info_update(index, cursor);
}
#endif
ut_ad(cursor->up_match != ULINT_UNDEFINED ut_ad(cursor->up_match != ULINT_UNDEFINED
|| mode != PAGE_CUR_GE); || mode != PAGE_CUR_GE);
ut_ad(cursor->up_match != ULINT_UNDEFINED ut_ad(cursor->up_match != ULINT_UNDEFINED
......
...@@ -95,7 +95,9 @@ btr_pcur_store_position( ...@@ -95,7 +95,9 @@ btr_pcur_store_position(
ut_a(cursor->latch_mode != BTR_NO_LATCHES); ut_a(cursor->latch_mode != BTR_NO_LATCHES);
if (page_get_n_recs(page) == 0) { if (page_get_n_recs(page) == 0) {
/* It must be an empty index tree */ /* It must be an empty index tree; NOTE that in this case
we do not store the modify_clock, but always do a search
if we restore the cursor position */
ut_a(btr_page_get_next(page, mtr) == FIL_NULL ut_a(btr_page_get_next(page, mtr) == FIL_NULL
&& btr_page_get_prev(page, mtr) == FIL_NULL); && btr_page_get_prev(page, mtr) == FIL_NULL);
...@@ -128,12 +130,13 @@ btr_pcur_store_position( ...@@ -128,12 +130,13 @@ btr_pcur_store_position(
} else { } else {
cursor->rel_pos = BTR_PCUR_ON; cursor->rel_pos = BTR_PCUR_ON;
} }
cursor->old_stored = BTR_PCUR_OLD_STORED; cursor->old_stored = BTR_PCUR_OLD_STORED;
cursor->old_rec = dict_tree_copy_rec_order_prefix(tree, rec, cursor->old_rec = dict_tree_copy_rec_order_prefix(tree, rec,
&(cursor->old_rec_buf), &(cursor->old_rec_buf),
&(cursor->buf_size)); &(cursor->buf_size));
cursor->block_when_stored = buf_block_align(page);
cursor->modify_clock = buf_frame_get_modify_clock(page); cursor->modify_clock = buf_frame_get_modify_clock(page);
} }
...@@ -205,6 +208,9 @@ btr_pcur_restore_position( ...@@ -205,6 +208,9 @@ btr_pcur_restore_position(
if (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE if (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
|| cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) { || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
/* In these cases we do not try an optimistic restoration,
but always do a search */
if (cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) { if (cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
from_left = TRUE; from_left = TRUE;
} else { } else {
...@@ -214,6 +220,10 @@ btr_pcur_restore_position( ...@@ -214,6 +220,10 @@ btr_pcur_restore_position(
btr_cur_open_at_index_side(from_left, btr_cur_open_at_index_side(from_left,
btr_pcur_get_btr_cur(cursor)->index, latch_mode, btr_pcur_get_btr_cur(cursor)->index, latch_mode,
btr_pcur_get_btr_cur(cursor), mtr); btr_pcur_get_btr_cur(cursor), mtr);
cursor->block_when_stored =
buf_block_align(btr_pcur_get_page(cursor));
return(FALSE); return(FALSE);
} }
...@@ -224,8 +234,9 @@ btr_pcur_restore_position( ...@@ -224,8 +234,9 @@ btr_pcur_restore_position(
if (latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF) { if (latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF) {
/* Try optimistic restoration */ /* Try optimistic restoration */
if (buf_page_optimistic_get(latch_mode, page, if (buf_page_optimistic_get(latch_mode,
cursor->modify_clock, mtr)) { cursor->block_when_stored, page,
cursor->modify_clock, mtr)) {
cursor->pos_state = BTR_PCUR_IS_POSITIONED; cursor->pos_state = BTR_PCUR_IS_POSITIONED;
buf_page_dbg_add_level(page, SYNC_TREE_NODE); buf_page_dbg_add_level(page, SYNC_TREE_NODE);
...@@ -270,8 +281,6 @@ btr_pcur_restore_position( ...@@ -270,8 +281,6 @@ btr_pcur_restore_position(
btr_pcur_open_with_no_init(btr_pcur_get_btr_cur(cursor)->index, tuple, btr_pcur_open_with_no_init(btr_pcur_get_btr_cur(cursor)->index, tuple,
mode, latch_mode, cursor, 0, mtr); mode, latch_mode, cursor, 0, mtr);
cursor->old_stored = BTR_PCUR_OLD_STORED;
/* Restore the old search mode */ /* Restore the old search mode */
cursor->search_mode = old_mode; cursor->search_mode = old_mode;
...@@ -280,11 +289,18 @@ btr_pcur_restore_position( ...@@ -280,11 +289,18 @@ btr_pcur_restore_position(
&& btr_pcur_is_on_user_rec(cursor, mtr) && btr_pcur_is_on_user_rec(cursor, mtr)
&& 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor))) { && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor))) {
/* We have to store the NEW value for the modify clock, since /* We have to store the NEW value for the modify clock, since
the cursor can now be on a different page! */ the cursor can now be on a different page! But we can retain
the value of old_rec */
cursor->modify_clock =
buf_frame_get_modify_clock(btr_pcur_get_page(cursor));
cursor->block_when_stored =
buf_block_align(btr_pcur_get_page(cursor));
cursor->old_stored = BTR_PCUR_OLD_STORED;
cursor->modify_clock = buf_frame_get_modify_clock(
buf_frame_align(btr_pcur_get_rec(cursor)));
mem_heap_free(heap); mem_heap_free(heap);
return(TRUE); return(TRUE);
...@@ -292,6 +308,12 @@ btr_pcur_restore_position( ...@@ -292,6 +308,12 @@ btr_pcur_restore_position(
mem_heap_free(heap); mem_heap_free(heap);
/* We have to store new position information, modify_clock etc.,
to the cursor because it can now be on a different page, the record
under it may have been removed, etc. */
btr_pcur_store_position(cursor, mtr);
return(FALSE); return(FALSE);
} }
......
This diff is collapsed.
...@@ -24,6 +24,7 @@ Created 11/11/1995 Heikki Tuuri ...@@ -24,6 +24,7 @@ Created 11/11/1995 Heikki Tuuri
#include "log0log.h" #include "log0log.h"
#include "os0file.h" #include "os0file.h"
#include "trx0sys.h" #include "trx0sys.h"
#include "srv0srv.h"
/* When flushed, dirty blocks are searched in neigborhoods of this size, and /* When flushed, dirty blocks are searched in neigborhoods of this size, and
flushed along with the original page. */ flushed along with the original page. */
...@@ -103,7 +104,7 @@ buf_flush_ready_for_replace( ...@@ -103,7 +104,7 @@ buf_flush_ready_for_replace(
/*========================*/ /*========================*/
/* out: TRUE if can replace immediately */ /* out: TRUE if can replace immediately */
buf_block_t* block) /* in: buffer control block, must be in state buf_block_t* block) /* in: buffer control block, must be in state
BUF_BLOCK_FILE_PAGE and in the LRU list*/ BUF_BLOCK_FILE_PAGE and in the LRU list */
{ {
ut_ad(mutex_own(&(buf_pool->mutex))); ut_ad(mutex_own(&(buf_pool->mutex)));
ut_ad(block->state == BUF_BLOCK_FILE_PAGE); ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
...@@ -134,7 +135,6 @@ buf_flush_ready_for_flush( ...@@ -134,7 +135,6 @@ buf_flush_ready_for_flush(
if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0) if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
&& (block->io_fix == 0)) { && (block->io_fix == 0)) {
if (flush_type != BUF_FLUSH_LRU) { if (flush_type != BUF_FLUSH_LRU) {
return(TRUE); return(TRUE);
...@@ -436,6 +436,20 @@ buf_flush_try_page( ...@@ -436,6 +436,20 @@ buf_flush_try_page(
&& block && buf_flush_ready_for_flush(block, flush_type)) { && block && buf_flush_ready_for_flush(block, flush_type)) {
block->io_fix = BUF_IO_WRITE; block->io_fix = BUF_IO_WRITE;
/* If AWE is enabled and the page is not mapped to a frame,
then map it */
if (block->frame == NULL) {
ut_a(srv_use_awe);
/* We set second parameter TRUE because the block is
in the LRU list and we must put it to
awe_LRU_free_mapped list once mapped to a frame */
buf_awe_map_page_to_frame(block, TRUE);
}
block->flush_type = flush_type; block->flush_type = flush_type;
if (buf_pool->n_flush[flush_type] == 0) { if (buf_pool->n_flush[flush_type] == 0) {
...@@ -486,6 +500,20 @@ buf_flush_try_page( ...@@ -486,6 +500,20 @@ buf_flush_try_page(
..._ready_for_flush). */ ..._ready_for_flush). */
block->io_fix = BUF_IO_WRITE; block->io_fix = BUF_IO_WRITE;
/* If AWE is enabled and the page is not mapped to a frame,
then map it */
if (block->frame == NULL) {
ut_a(srv_use_awe);
/* We set second parameter TRUE because the block is
in the LRU list and we must put it to
awe_LRU_free_mapped list once mapped to a frame */
buf_awe_map_page_to_frame(block, TRUE);
}
block->flush_type = flush_type; block->flush_type = flush_type;
if (buf_pool->n_flush[flush_type] == 0) { if (buf_pool->n_flush[flush_type] == 0) {
...@@ -511,6 +539,20 @@ buf_flush_try_page( ...@@ -511,6 +539,20 @@ buf_flush_try_page(
&& buf_flush_ready_for_flush(block, flush_type)) { && buf_flush_ready_for_flush(block, flush_type)) {
block->io_fix = BUF_IO_WRITE; block->io_fix = BUF_IO_WRITE;
/* If AWE is enabled and the page is not mapped to a frame,
then map it */
if (block->frame == NULL) {
ut_a(srv_use_awe);
/* We set second parameter TRUE because the block is
in the LRU list and we must put it to
awe_LRU_free_mapped list once mapped to a frame */
buf_awe_map_page_to_frame(block, TRUE);
}
block->flush_type = flush_type; block->flush_type = flush_type;
if (buf_pool->n_flush[block->flush_type] == 0) { if (buf_pool->n_flush[block->flush_type] == 0) {
......
...@@ -132,7 +132,13 @@ buf_LRU_search_and_free_block( ...@@ -132,7 +132,13 @@ buf_LRU_search_and_free_block(
mutex_exit(&(buf_pool->mutex)); mutex_exit(&(buf_pool->mutex));
btr_search_drop_page_hash_index(block->frame); /* Remove possible adaptive hash index built on the
page; in the case of AWE the block may not have a
frame at all */
if (block->frame) {
btr_search_drop_page_hash_index(block->frame);
}
mutex_enter(&(buf_pool->mutex)); mutex_enter(&(buf_pool->mutex));
...@@ -196,7 +202,9 @@ list. */ ...@@ -196,7 +202,9 @@ list. */
buf_block_t* buf_block_t*
buf_LRU_get_free_block(void) buf_LRU_get_free_block(void)
/*========================*/ /*========================*/
/* out: the free control block */ /* out: the free control block; also if AWE is
used, it is guaranteed that the block has its
page mapped to a frame when we return */
{ {
buf_block_t* block = NULL; buf_block_t* block = NULL;
ibool freed; ibool freed;
...@@ -257,6 +265,22 @@ loop: ...@@ -257,6 +265,22 @@ loop:
block = UT_LIST_GET_FIRST(buf_pool->free); block = UT_LIST_GET_FIRST(buf_pool->free);
UT_LIST_REMOVE(free, buf_pool->free, block); UT_LIST_REMOVE(free, buf_pool->free, block);
if (srv_use_awe) {
if (block->frame) {
/* Remove from the list of mapped pages */
UT_LIST_REMOVE(awe_LRU_free_mapped,
buf_pool->awe_LRU_free_mapped, block);
} else {
/* We map the page to a frame; second param
FALSE below because we do not want it to be
added to the awe_LRU_free_mapped list */
buf_awe_map_page_to_frame(block, FALSE);
}
}
block->state = BUF_BLOCK_READY_FOR_USE; block->state = BUF_BLOCK_READY_FOR_USE;
mutex_exit(&(buf_pool->mutex)); mutex_exit(&(buf_pool->mutex));
...@@ -429,6 +453,13 @@ buf_LRU_remove_block( ...@@ -429,6 +453,13 @@ buf_LRU_remove_block(
/* Remove the block from the LRU list */ /* Remove the block from the LRU list */
UT_LIST_REMOVE(LRU, buf_pool->LRU, block); UT_LIST_REMOVE(LRU, buf_pool->LRU, block);
if (srv_use_awe && block->frame) {
/* Remove from the list of mapped pages */
UT_LIST_REMOVE(awe_LRU_free_mapped,
buf_pool->awe_LRU_free_mapped, block);
}
/* If the LRU list is so short that LRU_old not defined, return */ /* If the LRU list is so short that LRU_old not defined, return */
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) { if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
...@@ -475,6 +506,13 @@ buf_LRU_add_block_to_end_low( ...@@ -475,6 +506,13 @@ buf_LRU_add_block_to_end_low(
UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block); UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block);
if (srv_use_awe && block->frame) {
/* Add to the list of mapped pages */
UT_LIST_ADD_LAST(awe_LRU_free_mapped,
buf_pool->awe_LRU_free_mapped, block);
}
if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) { if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
buf_pool->LRU_old_len++; buf_pool->LRU_old_len++;
...@@ -518,6 +556,15 @@ buf_LRU_add_block_low( ...@@ -518,6 +556,15 @@ buf_LRU_add_block_low(
block->old = old; block->old = old;
cl = buf_pool_clock_tic(); cl = buf_pool_clock_tic();
if (srv_use_awe && block->frame) {
/* Add to the list of mapped pages; for simplicity we always
add to the start, even if the user would have set 'old'
TRUE */
UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
buf_pool->awe_LRU_free_mapped, block);
}
if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) { if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block); UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block);
...@@ -613,6 +660,13 @@ buf_LRU_block_free_non_file_page( ...@@ -613,6 +660,13 @@ buf_LRU_block_free_non_file_page(
memset(block->frame, '\0', UNIV_PAGE_SIZE); memset(block->frame, '\0', UNIV_PAGE_SIZE);
#endif #endif
UT_LIST_ADD_FIRST(free, buf_pool->free, block); UT_LIST_ADD_FIRST(free, buf_pool->free, block);
if (srv_use_awe && block->frame) {
/* Add to the list of mapped pages */
UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
buf_pool->awe_LRU_free_mapped, block);
}
} }
/********************************************************************** /**********************************************************************
...@@ -639,7 +693,9 @@ buf_LRU_block_remove_hashed_page( ...@@ -639,7 +693,9 @@ buf_LRU_block_remove_hashed_page(
buf_pool->freed_page_clock += 1; buf_pool->freed_page_clock += 1;
buf_frame_modify_clock_inc(block->frame); /* Note that if AWE is enabled the block may not have a frame at all */
buf_block_modify_clock_inc(block);
HASH_DELETE(buf_block_t, hash, buf_pool->page_hash, HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
buf_page_address_fold(block->space, block->offset), buf_page_address_fold(block->space, block->offset),
......
...@@ -576,7 +576,7 @@ buf_read_recv_pages( ...@@ -576,7 +576,7 @@ buf_read_recv_pages(
os_aio_print_debug = FALSE; os_aio_print_debug = FALSE;
while (buf_pool->n_pend_reads >= RECV_POOL_N_FREE_BLOCKS / 2) { while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
os_aio_simulated_wake_handler_threads(); os_aio_simulated_wake_handler_threads();
os_thread_sleep(500000); os_thread_sleep(500000);
......
...@@ -466,6 +466,9 @@ struct btr_pcur_struct{ ...@@ -466,6 +466,9 @@ struct btr_pcur_struct{
BTR_PCUR_AFTER, depending on whether BTR_PCUR_AFTER, depending on whether
cursor was on, before, or after the cursor was on, before, or after the
old_rec record */ old_rec record */
buf_block_t* block_when_stored;/* buffer block when the position was
stored; note that if AWE is on, frames
may move */
dulint modify_clock; /* the modify clock value of the dulint modify_clock; /* the modify clock value of the
buffer block when the cursor position buffer block when the cursor position
was stored */ was stored */
......
...@@ -30,6 +30,7 @@ Created 11/5/1995 Heikki Tuuri ...@@ -30,6 +30,7 @@ Created 11/5/1995 Heikki Tuuri
#include "sync0rw.h" #include "sync0rw.h"
#include "hash0hash.h" #include "hash0hash.h"
#include "ut0byte.h" #include "ut0byte.h"
#include "os0proc.h"
/* Flags for flush types */ /* Flags for flush types */
#define BUF_FLUSH_LRU 1 #define BUF_FLUSH_LRU 1
...@@ -58,23 +59,34 @@ extern ibool buf_debug_prints;/* If this is set TRUE, the program ...@@ -58,23 +59,34 @@ extern ibool buf_debug_prints;/* If this is set TRUE, the program
occurs */ occurs */
/************************************************************************ /************************************************************************
Initializes the buffer pool of the database. */ Creates the buffer pool. */
void buf_pool_t*
buf_pool_init( buf_pool_init(
/*==========*/ /*==========*/
ulint max_size, /* in: maximum size of the pool in blocks */ /* out, own: buf_pool object, NULL if not
ulint curr_size); /* in: current size to use, must be <= enough memory or error */
ulint max_size, /* in: maximum size of the buf_pool in
blocks */
ulint curr_size, /* in: current size to use, must be <=
max_size, currently must be equal to
max_size */ max_size */
ulint n_frames); /* in: number of frames; if AWE is used,
this is the size of the address space window
where physical memory pages are mapped; if
AWE is not used then this must be the same
as max_size */
/************************************************************************* /*************************************************************************
Gets the current size of buffer pool in bytes. */ Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
size of AWE window (= the frames). */
UNIV_INLINE UNIV_INLINE
ulint ulint
buf_pool_get_curr_size(void); buf_pool_get_curr_size(void);
/*========================*/ /*========================*/
/* out: size in bytes */ /* out: size in bytes */
/************************************************************************* /*************************************************************************
Gets the maximum size of buffer pool in bytes. */ Gets the maximum size of buffer pool in bytes. In the case of AWE, the
size of AWE window (= the frames). */
UNIV_INLINE UNIV_INLINE
ulint ulint
buf_pool_get_max_size(void); buf_pool_get_max_size(void);
...@@ -138,8 +150,8 @@ improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */ ...@@ -138,8 +150,8 @@ improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */
NOTE! The following macros should be used instead of NOTE! The following macros should be used instead of
buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
RW_X_LATCH are allowed as LA! */ RW_X_LATCH are allowed as LA! */
#define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\ #define buf_page_optimistic_get(LA, BL, G, MC, MTR) buf_page_optimistic_get_func(\
LA, G, MC, IB__FILE__, __LINE__, MTR) LA, BL, G, MC, IB__FILE__, __LINE__, MTR)
/************************************************************************ /************************************************************************
This is the general function used to get optimistic access to a database This is the general function used to get optimistic access to a database
page. */ page. */
...@@ -149,7 +161,9 @@ buf_page_optimistic_get_func( ...@@ -149,7 +161,9 @@ buf_page_optimistic_get_func(
/*=========================*/ /*=========================*/
/* out: TRUE if success */ /* out: TRUE if success */
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
buf_frame_t* guess, /* in: guessed frame */ buf_block_t* block, /* in: guessed block */
buf_frame_t* guess, /* in: guessed frame; note that AWE may move
frames */
dulint modify_clock,/* in: modify clock value if mode is dulint modify_clock,/* in: modify clock value if mode is
..._GUESS_ON_CLOCK */ ..._GUESS_ON_CLOCK */
char* file, /* in: file name */ char* file, /* in: file name */
...@@ -350,6 +364,16 @@ buf_frame_modify_clock_inc( ...@@ -350,6 +364,16 @@ buf_frame_modify_clock_inc(
/* out: new value */ /* out: new value */
buf_frame_t* frame); /* in: pointer to a frame */ buf_frame_t* frame); /* in: pointer to a frame */
/************************************************************************ /************************************************************************
Increments the modify clock of a frame by 1. The caller must (1) own the
buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
on the block. */
UNIV_INLINE
dulint
buf_block_modify_clock_inc(
/*=======================*/
/* out: new value */
buf_block_t* block); /* in: block */
/************************************************************************
Returns the value of the modify clock. The caller must have an s-lock Returns the value of the modify clock. The caller must have an s-lock
or x-lock on the block. */ or x-lock on the block. */
UNIV_INLINE UNIV_INLINE
...@@ -428,7 +452,7 @@ UNIV_INLINE ...@@ -428,7 +452,7 @@ UNIV_INLINE
buf_frame_t* buf_frame_t*
buf_frame_align( buf_frame_align(
/*============*/ /*============*/
/* out: pointer to block */ /* out: pointer to frame */
byte* ptr); /* in: pointer to a frame */ byte* ptr); /* in: pointer to a frame */
/*********************************************************************** /***********************************************************************
Checks if a pointer points to the block array of the buffer pool (blocks, not Checks if a pointer points to the block array of the buffer pool (blocks, not
...@@ -505,6 +529,19 @@ buf_pool_invalidate(void); ...@@ -505,6 +529,19 @@ buf_pool_invalidate(void);
--------------------------- LOWER LEVEL ROUTINES ------------------------- --------------------------- LOWER LEVEL ROUTINES -------------------------
=========================================================================*/ =========================================================================*/
/************************************************************************
Maps the page of block to a frame, if not mapped yet. Unmaps some page
from the end of the awe_LRU_free_mapped. */
void
buf_awe_map_page_to_frame(
/*======================*/
buf_block_t* block, /* in: block whose page should be
mapped to a frame */
ibool add_to_mapped_list);/* in: TRUE if we in the case
we need to map the page should also
add the block to the
awe_LRU_free_mapped list */
/************************************************************************* /*************************************************************************
Adds latch level info for the rw-lock protecting the buffer frame. This Adds latch level info for the rw-lock protecting the buffer frame. This
should be called in the debug version after a successful latching of a should be called in the debug version after a successful latching of a
...@@ -638,7 +675,16 @@ struct buf_block_struct{ ...@@ -638,7 +675,16 @@ struct buf_block_struct{
byte* frame; /* pointer to buffer frame which byte* frame; /* pointer to buffer frame which
is of size UNIV_PAGE_SIZE, and is of size UNIV_PAGE_SIZE, and
aligned to an address divisible by aligned to an address divisible by
UNIV_PAGE_SIZE */ UNIV_PAGE_SIZE; if AWE is used, this
will be NULL for the pages which are
currently not mapped into the virtual
address space window of the buffer
pool */
os_awe_t* awe_info; /* if AWE is used, then an array of
awe page infos for
UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE
(normally = 4) physical memory
pages; otherwise NULL */
ulint space; /* space id of the page */ ulint space; /* space id of the page */
ulint offset; /* page number within the space */ ulint offset; /* page number within the space */
ulint lock_hash_val; /* hashed value of the page address ulint lock_hash_val; /* hashed value of the page address
...@@ -691,6 +737,10 @@ struct buf_block_struct{ ...@@ -691,6 +737,10 @@ struct buf_block_struct{
/* node of the free block list */ /* node of the free block list */
UT_LIST_NODE_T(buf_block_t) LRU; UT_LIST_NODE_T(buf_block_t) LRU;
/* node of the LRU list */ /* node of the LRU list */
UT_LIST_NODE_T(buf_block_t) awe_LRU_free_mapped;
/* in the AWE version node in the
list of free and LRU blocks which are
mapped to a frame */
ulint LRU_position; /* value which monotonically ulint LRU_position; /* value which monotonically
decreases (or may stay constant if decreases (or may stay constant if
the block is in the old blocks) toward the block is in the old blocks) toward
...@@ -758,11 +808,12 @@ struct buf_block_struct{ ...@@ -758,11 +808,12 @@ struct buf_block_struct{
BTR_SEARCH_RIGHT_SIDE in hash BTR_SEARCH_RIGHT_SIDE in hash
indexing */ indexing */
/* 6. Debug fields */ /* 6. Debug fields */
#ifdef UNIV_SYNC_DEBUG
rw_lock_t debug_latch; /* in the debug version, each thread rw_lock_t debug_latch; /* in the debug version, each thread
which bufferfixes the block acquires which bufferfixes the block acquires
an s-latch here; so we can use the an s-latch here; so we can use the
debug utilities in sync0rw */ debug utilities in sync0rw */
#endif
ibool file_page_was_freed; ibool file_page_was_freed;
/* this is set to TRUE when fsp /* this is set to TRUE when fsp
frees a page in buffer pool */ frees a page in buffer pool */
...@@ -781,16 +832,36 @@ struct buf_pool_struct{ ...@@ -781,16 +832,36 @@ struct buf_pool_struct{
struct and control blocks, except the struct and control blocks, except the
read-write lock in them */ read-write lock in them */
byte* frame_mem; /* pointer to the memory area which byte* frame_mem; /* pointer to the memory area which
was allocated for the frames */ was allocated for the frames; in AWE
this is the virtual address space
window where we map pages stored
in physical memory */
byte* frame_zero; /* pointer to the first buffer frame: byte* frame_zero; /* pointer to the first buffer frame:
this may differ from frame_mem, because this may differ from frame_mem, because
this is aligned by the frame size */ this is aligned by the frame size */
byte* high_end; /* pointer to the end of the byte* high_end; /* pointer to the end of the buffer
buffer pool */ frames */
ulint n_frames; /* number of frames */
buf_block_t* blocks; /* array of buffer control blocks */ buf_block_t* blocks; /* array of buffer control blocks */
buf_block_t** blocks_of_frames;/* inverse mapping which can be used
to retrieve the buffer control block
of a frame; this is an array which
lists the blocks of frames in the
order frame_zero,
frame_zero + UNIV_PAGE_SIZE, ...
a control block is always assigned
for each frame, even if the frame does
not contain any data; note that in AWE
there are more control blocks than
buffer frames */
os_awe_t* awe_info; /* if AWE is used, AWE info for the
physical 4 kB memory pages associated
with buffer frames */
ulint max_size; /* number of control blocks == ulint max_size; /* number of control blocks ==
maximum pool size in pages */ maximum pool size in pages */
ulint curr_size; /* current pool size in pages */ ulint curr_size; /* current pool size in pages;
currently always the same as
max_size */
hash_table_t* page_hash; /* hash table of the file pages */ hash_table_t* page_hash; /* hash table of the file pages */
ulint n_pend_reads; /* number of pending read operations */ ulint n_pend_reads; /* number of pending read operations */
...@@ -802,11 +873,14 @@ struct buf_pool_struct{ ...@@ -802,11 +873,14 @@ struct buf_pool_struct{
ulint n_pages_created;/* number of pages created in the pool ulint n_pages_created;/* number of pages created in the pool
with no read */ with no read */
ulint n_page_gets; /* number of page gets performed; ulint n_page_gets; /* number of page gets performed;
also successful seraches through also successful searches through
the adaptive hash index are the adaptive hash index are
counted as page gets; this field counted as page gets; this field
is NOT protected by the buffer is NOT protected by the buffer
pool mutex */ pool mutex */
ulint n_pages_awe_remapped; /* if AWE is enabled, the
number of remaps of blocks to
buffer frames */
ulint n_page_gets_old;/* n_page_gets when buf_print was ulint n_page_gets_old;/* n_page_gets when buf_print was
last time called: used to calculate last time called: used to calculate
hit rate */ hit rate */
...@@ -815,6 +889,7 @@ struct buf_pool_struct{ ...@@ -815,6 +889,7 @@ struct buf_pool_struct{
ulint n_pages_written_old;/* number write operations */ ulint n_pages_written_old;/* number write operations */
ulint n_pages_created_old;/* number of pages created in ulint n_pages_created_old;/* number of pages created in
the pool with no read */ the pool with no read */
ulint n_pages_awe_remapped_old;
/* 2. Page flushing algorithm fields */ /* 2. Page flushing algorithm fields */
UT_LIST_BASE_NODE_T(buf_block_t) flush_list; UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
...@@ -847,7 +922,10 @@ struct buf_pool_struct{ ...@@ -847,7 +922,10 @@ struct buf_pool_struct{
/* 3. LRU replacement algorithm fields */ /* 3. LRU replacement algorithm fields */
UT_LIST_BASE_NODE_T(buf_block_t) free; UT_LIST_BASE_NODE_T(buf_block_t) free;
/* base node of the free block list */ /* base node of the free block list;
in the case of AWE, at the start are
always free blocks for which the
physical memory is mapped to a frame */
UT_LIST_BASE_NODE_T(buf_block_t) LRU; UT_LIST_BASE_NODE_T(buf_block_t) LRU;
/* base node of the LRU list */ /* base node of the LRU list */
buf_block_t* LRU_old; /* pointer to the about 3/8 oldest buf_block_t* LRU_old; /* pointer to the about 3/8 oldest
...@@ -859,6 +937,12 @@ struct buf_pool_struct{ ...@@ -859,6 +937,12 @@ struct buf_pool_struct{
see buf0lru.c for the restrictions see buf0lru.c for the restrictions
on this value; not defined if on this value; not defined if
LRU_old == NULL */ LRU_old == NULL */
UT_LIST_BASE_NODE_T(buf_block_t) awe_LRU_free_mapped;
/* list of those blocks which are
in the LRU list or the free list, and
where the page is mapped to a frame;
thus, frames allocated, e.g., to the
locki table, are not in this list */
}; };
/* States of a control block */ /* States of a control block */
......
...@@ -36,25 +36,27 @@ buf_block_peek_if_too_old( ...@@ -36,25 +36,27 @@ buf_block_peek_if_too_old(
} }
/************************************************************************* /*************************************************************************
Gets the current size of buffer buf_pool in bytes. */ Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
size of AWE window (= the frames). */
UNIV_INLINE UNIV_INLINE
ulint ulint
buf_pool_get_curr_size(void) buf_pool_get_curr_size(void)
/*========================*/ /*========================*/
/* out: size in bytes */ /* out: size in bytes */
{ {
return((buf_pool->curr_size) * UNIV_PAGE_SIZE); return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
} }
/************************************************************************* /*************************************************************************
Gets the maximum size of buffer buf_pool in bytes. */ Gets the maximum size of buffer buf_pool in bytes. In the case of AWE, the
size of AWE window (= the frames). */
UNIV_INLINE UNIV_INLINE
ulint ulint
buf_pool_get_max_size(void) buf_pool_get_max_size(void)
/*=======================*/ /*=======================*/
/* out: size in bytes */ /* out: size in bytes */
{ {
return((buf_pool->max_size) * UNIV_PAGE_SIZE); return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
} }
/*********************************************************************** /***********************************************************************
...@@ -207,54 +209,24 @@ buf_block_align( ...@@ -207,54 +209,24 @@ buf_block_align(
frame_zero = buf_pool->frame_zero; frame_zero = buf_pool->frame_zero;
ut_ad((ulint)ptr >= (ulint)frame_zero); if ((ulint)ptr < (ulint)frame_zero
|| (ulint)ptr > (ulint)(buf_pool->high_end)) {
block = buf_pool_get_nth_block(buf_pool, ((ulint)(ptr - frame_zero))
>> UNIV_PAGE_SIZE_SHIFT);
if (block < buf_pool->blocks
|| block >= buf_pool->blocks + buf_pool->max_size) {
ut_print_timestamp(stderr);
fprintf(stderr, fprintf(stderr,
"InnoDB: Error: trying to access a stray pointer %lx\n" " InnoDB: Error: trying to access a stray pointer %lx\n"
"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr, "InnoDB: buf pool start is at %lx, end at %lx\n"
(ulint)frame_zero, buf_pool->max_size); "InnoDB: Probable reason is database corruption or memory\n"
"InnoDB: corruption. If this happens in an InnoDB database recovery,\n"
"InnoDB: you can look from section 6.1 at http://www.innodb.com/ibman.html\n"
"InnoDB: how to force recovery.\n",
(ulint)ptr, (ulint)frame_zero,
(ulint)(buf_pool->high_end));
ut_a(0); ut_a(0);
} }
return(block); block = *(buf_pool->blocks_of_frames + (((ulint)(ptr - frame_zero))
} >> UNIV_PAGE_SIZE_SHIFT));
/***********************************************************************
Gets the block to whose frame the pointer is pointing to. Does not
require a file page to be bufferfixed. */
UNIV_INLINE
buf_block_t*
buf_block_align_low(
/*================*/
/* out: pointer to block */
byte* ptr) /* in: pointer to a frame */
{
buf_block_t* block;
buf_frame_t* frame_zero;
ut_ad(ptr);
frame_zero = buf_pool->frame_zero;
ut_ad((ulint)ptr >= (ulint)frame_zero);
block = buf_pool_get_nth_block(buf_pool, ((ulint)(ptr - frame_zero))
>> UNIV_PAGE_SIZE_SHIFT);
if (block < buf_pool->blocks
|| block >= buf_pool->blocks + buf_pool->max_size) {
fprintf(stderr,
"InnoDB: Error: trying to access a stray pointer %lx\n"
"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr,
(ulint)frame_zero, buf_pool->max_size);
ut_a(0);
}
return(block); return(block);
} }
...@@ -264,7 +236,7 @@ UNIV_INLINE ...@@ -264,7 +236,7 @@ UNIV_INLINE
buf_frame_t* buf_frame_t*
buf_frame_align( buf_frame_align(
/*============*/ /*============*/
/* out: pointer to block */ /* out: pointer to frame */
byte* ptr) /* in: pointer to a frame */ byte* ptr) /* in: pointer to a frame */
{ {
buf_frame_t* frame; buf_frame_t* frame;
...@@ -273,14 +245,19 @@ buf_frame_align( ...@@ -273,14 +245,19 @@ buf_frame_align(
frame = ut_align_down(ptr, UNIV_PAGE_SIZE); frame = ut_align_down(ptr, UNIV_PAGE_SIZE);
if (((ulint)frame if (((ulint)frame < (ulint)(buf_pool->frame_zero))
< (ulint)(buf_pool->frame_zero)) || (ulint)frame >= (ulint)(buf_pool->high_end)) {
|| ((ulint)frame > (ulint)(buf_pool_get_nth_block(buf_pool,
buf_pool->max_size - 1)->frame))) { ut_print_timestamp(stderr);
fprintf(stderr, fprintf(stderr,
"InnoDB: Error: trying to access a stray pointer %lx\n" " InnoDB: Error: trying to access a stray pointer %lx\n"
"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr, "InnoDB: buf pool start is at %lx, end at %lx\n"
(ulint)(buf_pool->frame_zero), buf_pool->max_size); "InnoDB: Probable reason is database corruption or memory\n"
"InnoDB: corruption. If this happens in an InnoDB database recovery,\n"
"InnoDB: you can look from section 6.1 at http://www.innodb.com/ibman.html\n"
"InnoDB: how to force recovery.\n",
(ulint)ptr, (ulint)(buf_pool->frame_zero),
(ulint)(buf_pool->high_end));
ut_a(0); ut_a(0);
} }
...@@ -469,7 +446,7 @@ buf_frame_modify_clock_inc( ...@@ -469,7 +446,7 @@ buf_frame_modify_clock_inc(
ut_ad(frame); ut_ad(frame);
block = buf_block_align_low(frame); block = buf_block_align(frame);
ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0)) ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
|| rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE)); || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
...@@ -479,6 +456,25 @@ buf_frame_modify_clock_inc( ...@@ -479,6 +456,25 @@ buf_frame_modify_clock_inc(
return(block->modify_clock); return(block->modify_clock);
} }
/************************************************************************
Increments the modify clock of a frame by 1. The caller must (1) own the
buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
on the block. */
UNIV_INLINE
dulint
buf_block_modify_clock_inc(
/*=======================*/
/* out: new value */
buf_block_t* block) /* in: block */
{
ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
|| rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
UT_DULINT_INC(block->modify_clock);
return(block->modify_clock);
}
/************************************************************************ /************************************************************************
Returns the value of the modify clock. The caller must have an s-lock Returns the value of the modify clock. The caller must have an s-lock
or x-lock on the block. */ or x-lock on the block. */
...@@ -508,15 +504,16 @@ void ...@@ -508,15 +504,16 @@ void
buf_block_buf_fix_inc_debug( buf_block_buf_fix_inc_debug(
/*========================*/ /*========================*/
buf_block_t* block, /* in: block to bufferfix */ buf_block_t* block, /* in: block to bufferfix */
char* file, /* in: file name */ char* file __attribute__ ((unused)), /* in: file name */
ulint line) /* in: line */ ulint line __attribute__ ((unused))) /* in: line */
{ {
#ifdef UNIV_SYNC_DEBUG
ibool ret; ibool ret;
ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line); ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
ut_ad(ret == TRUE); ut_ad(ret == TRUE);
#endif
block->buf_fix_count++; block->buf_fix_count++;
} }
......
...@@ -53,7 +53,9 @@ LRU list to the free list. */ ...@@ -53,7 +53,9 @@ LRU list to the free list. */
buf_block_t* buf_block_t*
buf_LRU_get_free_block(void); buf_LRU_get_free_block(void);
/*=========================*/ /*=========================*/
/* out: the free control block */ /* out: the free control block; also if AWE is
used, it is guaranteed that the block has its
page mapped to a frame when we return */
/********************************************************************** /**********************************************************************
Puts a block back to the free list. */ Puts a block back to the free list. */
......
...@@ -355,12 +355,7 @@ in the debug version: spaces with an odd number as the id are replicate ...@@ -355,12 +355,7 @@ in the debug version: spaces with an odd number as the id are replicate
spaces */ spaces */
#define RECV_REPLICA_SPACE_ADD 1 #define RECV_REPLICA_SPACE_ADD 1
/* This many blocks must be left free in the buffer pool when we scan extern ulint recv_n_pool_free_frames;
the log and store the scanned log records in the buffer pool: we will
use these free blocks to read in pages when we start applying the
log records to the database. */
#define RECV_POOL_N_FREE_BLOCKS (ut_min(256, buf_pool_get_curr_size() / 8))
#ifndef UNIV_NONINL #ifndef UNIV_NONINL
#include "log0recv.ic" #include "log0recv.ic"
......
...@@ -15,6 +15,76 @@ Created 9/30/1995 Heikki Tuuri ...@@ -15,6 +15,76 @@ Created 9/30/1995 Heikki Tuuri
typedef void* os_process_t; typedef void* os_process_t;
typedef unsigned long int os_process_id_t; typedef unsigned long int os_process_id_t;
/* The cell type in os_awe_allocate_mem page info */
#ifdef __NT__
typedef ULONG_PTR os_awe_t;
#else
typedef ulint os_awe_t;
#endif
/* Physical page size when Windows AWE is used. This is the normal
page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB
pages. */
#define OS_AWE_X86_PAGE_SIZE 4096
/********************************************************************
Windows AWE support. Tries to enable the "lock pages in memory" privilege for
the current process so that the current process can allocate memory-locked
virtual address space to act as the window where AWE maps physical memory. */
ibool
os_awe_enable_lock_pages_in_mem(void);
/*=================================*/
/* out: TRUE if success, FALSE if error;
prints error info to stderr if no success */
/********************************************************************
Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
processor. */
ibool
os_awe_allocate_physical_mem(
/*=========================*/
/* out: TRUE if success */
os_awe_t** page_info, /* out, own: array of opaque data containing
the info for allocated physical memory pages;
each allocated 4 kB physical memory page has
one slot of type os_awe_t in the array */
ulint n_megabytes); /* in: number of megabytes to allocate */
/********************************************************************
Allocates a window in the virtual address space where we can map then
pages of physical memory. */
byte*
os_awe_allocate_virtual_mem_window(
/*===============================*/
/* out, own: allocated memory, or NULL if did not
succeed */
ulint size); /* in: virtual memory allocation size in bytes, must
be < 2 GB */
/********************************************************************
With this function you can map parts of physical memory allocated with
the ..._allocate_physical_mem to the virtual address space allocated with
the previous function. Intel implements this so that the process page
tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
showed that this takes < 1 microsecond, much better than the estimated 80 us
for copying a 16 kB page memory to memory. But, the operation will at least
partially invalidate the translation lookaside buffer (TLB) of all
processors. Under a real-world load the performance hit may be bigger. */
ibool
os_awe_map_physical_mem_to_window(
/*==============================*/
/* out: TRUE if success; the function
calls exit(1) in case of an error */
byte* ptr, /* in: a page-aligned pointer to
somewhere in the virtual address
space window; we map the physical mem
pages here */
ulint n_mem_pages, /* in: number of 4 kB mem pages to
map */
os_awe_t* page_info); /* in: array of page infos for those
pages; each page has one slot in the
array */
/******************************************************************** /********************************************************************
Converts the current process id to a number. It is not guaranteed that the Converts the current process id to a number. It is not guaranteed that the
number is unique. In Linux returns the 'process number' of the current number is unique. In Linux returns the 'process number' of the current
......
...@@ -61,6 +61,7 @@ extern ulint srv_flush_log_at_trx_commit; ...@@ -61,6 +61,7 @@ extern ulint srv_flush_log_at_trx_commit;
extern byte srv_latin1_ordering[256];/* The sort order table of the latin1 extern byte srv_latin1_ordering[256];/* The sort order table of the latin1
character set */ character set */
extern ulint srv_pool_size; extern ulint srv_pool_size;
extern ulint srv_awe_window_size;
extern ulint srv_mem_pool_size; extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size; extern ulint srv_lock_table_size;
...@@ -86,6 +87,8 @@ extern ibool srv_use_doublewrite_buf; ...@@ -86,6 +87,8 @@ extern ibool srv_use_doublewrite_buf;
extern ibool srv_set_thread_priorities; extern ibool srv_set_thread_priorities;
extern int srv_query_thread_priority; extern int srv_query_thread_priority;
extern ibool srv_use_awe;
extern ibool srv_use_adaptive_hash_indexes;
/*-------------------------------------------*/ /*-------------------------------------------*/
extern ulint srv_n_rows_inserted; extern ulint srv_n_rows_inserted;
......
...@@ -437,25 +437,29 @@ log_group_calc_lsn_offset( ...@@ -437,25 +437,29 @@ log_group_calc_lsn_offset(
dulint lsn, /* in: lsn, must be within 4 GB of group->lsn */ dulint lsn, /* in: lsn, must be within 4 GB of group->lsn */
log_group_t* group) /* in: log group */ log_group_t* group) /* in: log group */
{ {
dulint gr_lsn; dulint gr_lsn;
ulint gr_lsn_size_offset; ib_longlong gr_lsn_size_offset;
ulint difference; ib_longlong difference;
ulint group_size; ib_longlong group_size;
ulint offset; ib_longlong offset;
ut_ad(mutex_own(&(log_sys->mutex))); ut_ad(mutex_own(&(log_sys->mutex)));
/* If total log file size is > 2 GB we can easily get overflows
with 32-bit integers. Use 64-bit integers instead. */
gr_lsn = group->lsn; gr_lsn = group->lsn;
gr_lsn_size_offset = log_group_calc_size_offset(group->lsn_offset, gr_lsn_size_offset = (ib_longlong)
group); log_group_calc_size_offset(group->lsn_offset, group);
group_size = log_group_get_capacity(group);
group_size = (ib_longlong) log_group_get_capacity(group);
if (ut_dulint_cmp(lsn, gr_lsn) >= 0) { if (ut_dulint_cmp(lsn, gr_lsn) >= 0) {
difference = ut_dulint_minus(lsn, gr_lsn); difference = (ib_longlong) ut_dulint_minus(lsn, gr_lsn);
} else { } else {
difference = ut_dulint_minus(gr_lsn, lsn); difference = (ib_longlong) ut_dulint_minus(gr_lsn, lsn);
difference = difference % group_size; difference = difference % group_size;
...@@ -464,7 +468,13 @@ log_group_calc_lsn_offset( ...@@ -464,7 +468,13 @@ log_group_calc_lsn_offset(
offset = (gr_lsn_size_offset + difference) % group_size; offset = (gr_lsn_size_offset + difference) % group_size;
return(log_group_calc_real_offset(offset, group)); ut_a(offset <= 0xFFFFFFFF);
/* printf("Offset is %lu gr_lsn_offset is %lu difference is %lu\n",
(ulint)offset,(ulint)gr_lsn_size_offset, (ulint)difference);
*/
return(log_group_calc_real_offset((ulint)offset, group));
} }
/*********************************************************************** /***********************************************************************
...@@ -3054,8 +3064,8 @@ log_check_log_recs( ...@@ -3054,8 +3064,8 @@ log_check_log_recs(
ut_memcpy(scan_buf, start, end - start); ut_memcpy(scan_buf, start, end - start);
recv_scan_log_recs(TRUE, recv_scan_log_recs(TRUE,
buf_pool_get_curr_size() - (buf_pool->n_frames -
RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE, recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
FALSE, scan_buf, end - start, FALSE, scan_buf, end - start,
ut_dulint_align_down(buf_start_lsn, ut_dulint_align_down(buf_start_lsn,
OS_FILE_LOG_BLOCK_SIZE), OS_FILE_LOG_BLOCK_SIZE),
......
...@@ -71,6 +71,14 @@ ulint recv_previous_parsed_rec_is_multi = 0; ...@@ -71,6 +71,14 @@ ulint recv_previous_parsed_rec_is_multi = 0;
ulint recv_max_parsed_page_no = 0; ulint recv_max_parsed_page_no = 0;
/* This many frames must be left free in the buffer pool when we scan
the log and store the scanned log records in the buffer pool: we will
use these free frames to read in pages when we start applying the
log records to the database. */
ulint recv_n_pool_free_frames = 256;
/************************************************************ /************************************************************
Creates the recovery system. */ Creates the recovery system. */
...@@ -1018,10 +1026,10 @@ recv_recover_page( ...@@ -1018,10 +1026,10 @@ recv_recover_page(
block = buf_block_align(page); block = buf_block_align(page);
if (just_read_in) { if (just_read_in) {
/* Move the ownership of the x-latch on the page to this OS /* Move the ownership of the x-latch on the page to
thread, so that we can acquire a second x-latch on it. This this OS thread, so that we can acquire a second
is needed for the operations to the page to pass the debug x-latch on it. This is needed for the operations to
checks. */ the page to pass the debug checks. */
rw_lock_x_lock_move_ownership(&(block->lock)); rw_lock_x_lock_move_ownership(&(block->lock));
} }
...@@ -2362,8 +2370,8 @@ recv_group_scan_log_recs( ...@@ -2362,8 +2370,8 @@ recv_group_scan_log_recs(
group, start_lsn, end_lsn); group, start_lsn, end_lsn);
finished = recv_scan_log_recs(TRUE, finished = recv_scan_log_recs(TRUE,
buf_pool_get_curr_size() (buf_pool->n_frames
- RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE, - recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
TRUE, log_sys->buf, TRUE, log_sys->buf,
RECV_SCAN_SIZE, start_lsn, RECV_SCAN_SIZE, start_lsn,
contiguous_lsn, group_scanned_lsn); contiguous_lsn, group_scanned_lsn);
...@@ -3001,8 +3009,8 @@ ask_again: ...@@ -3001,8 +3009,8 @@ ask_again:
read_offset % UNIV_PAGE_SIZE, len, buf, NULL); read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
ret = recv_scan_log_recs(TRUE, ret = recv_scan_log_recs(TRUE,
buf_pool_get_curr_size() - (buf_pool->n_frames -
RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE, recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
TRUE, buf, len, start_lsn, TRUE, buf, len, start_lsn,
&dummy_lsn, &scanned_lsn); &dummy_lsn, &scanned_lsn);
......
...@@ -2127,7 +2127,7 @@ os_aio_simulated_handle( ...@@ -2127,7 +2127,7 @@ os_aio_simulated_handle(
ulint offs; ulint offs;
ulint lowest_offset; ulint lowest_offset;
byte* combined_buf; byte* combined_buf;
byte* combined_buf2= 0; /* Remove warning */ byte* combined_buf2;
ibool ret; ibool ret;
ulint n; ulint n;
ulint i; ulint i;
......
This diff is collapsed.
...@@ -140,9 +140,14 @@ byte srv_latin1_ordering[256] /* The sort order table of the latin1 ...@@ -140,9 +140,14 @@ byte srv_latin1_ordering[256] /* The sort order table of the latin1
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF , 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
}; };
ulint srv_pool_size = ULINT_MAX; /* size in database pages; ulint srv_pool_size = ULINT_MAX; /* size in pages; MySQL inits
MySQL originally sets this this to size in kilobytes but
value in megabytes */ we normalize this to pages in
srv_boot() */
ulint srv_awe_window_size = 0; /* size in pages; MySQL inits
this to bytes, but we
normalize it to pages in
srv_boot() */
ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */ ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */
ulint srv_lock_table_size = ULINT_MAX; ulint srv_lock_table_size = ULINT_MAX;
...@@ -218,6 +223,13 @@ ibool srv_use_doublewrite_buf = TRUE; ...@@ -218,6 +223,13 @@ ibool srv_use_doublewrite_buf = TRUE;
ibool srv_set_thread_priorities = TRUE; ibool srv_set_thread_priorities = TRUE;
int srv_query_thread_priority = 0; int srv_query_thread_priority = 0;
/* TRUE if the Address Windowing Extensions of Windows are used; then we must
disable adaptive hash indexes */
ibool srv_use_awe = FALSE;
ibool srv_use_adaptive_hash_indexes = TRUE;
/*-------------------------------------------*/ /*-------------------------------------------*/
ulint srv_n_spin_wait_rounds = 20; ulint srv_n_spin_wait_rounds = 20;
ulint srv_spin_wait_delay = 5; ulint srv_spin_wait_delay = 5;
...@@ -1956,9 +1968,19 @@ srv_normalize_init_values(void) ...@@ -1956,9 +1968,19 @@ srv_normalize_init_values(void)
srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE; srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
srv_pool_size = srv_pool_size / UNIV_PAGE_SIZE; srv_pool_size = srv_pool_size / (UNIV_PAGE_SIZE / 1024);
srv_awe_window_size = srv_awe_window_size / UNIV_PAGE_SIZE;
srv_lock_table_size = 20 * srv_pool_size; if (srv_use_awe) {
/* If we are using AWE we must save memory in the 32-bit
address space of the process, and cannot bind the lock
table size to the real buffer pool size. */
srv_lock_table_size = 20 * srv_awe_window_size;
} else {
srv_lock_table_size = 20 * srv_pool_size;
}
return(DB_SUCCESS); return(DB_SUCCESS);
} }
...@@ -2323,6 +2345,12 @@ srv_sprintf_innodb_monitor( ...@@ -2323,6 +2345,12 @@ srv_sprintf_innodb_monitor(
"Total memory allocated %lu; in additional pool allocated %lu\n", "Total memory allocated %lu; in additional pool allocated %lu\n",
ut_total_allocated_memory, ut_total_allocated_memory,
mem_pool_get_reserved(mem_comm_pool)); mem_pool_get_reserved(mem_comm_pool));
if (srv_use_awe) {
buf += sprintf(buf,
"In addition to that %lu MB of AWE memory allocated\n",
srv_pool_size / ((1024 * 1024) / UNIV_PAGE_SIZE));
}
buf_print_io(buf, buf_end); buf_print_io(buf, buf_end);
buf = buf + strlen(buf); buf = buf + strlen(buf);
ut_a(buf < buf_end + 1500); ut_a(buf < buf_end + 1500);
......
...@@ -935,6 +935,7 @@ innobase_start_or_create_for_mysql(void) ...@@ -935,6 +935,7 @@ innobase_start_or_create_for_mysql(void)
/*====================================*/ /*====================================*/
/* out: DB_SUCCESS or error code */ /* out: DB_SUCCESS or error code */
{ {
buf_pool_t* ret;
ibool create_new_db; ibool create_new_db;
ibool log_file_created; ibool log_file_created;
ibool log_created = FALSE; ibool log_created = FALSE;
...@@ -970,6 +971,11 @@ innobase_start_or_create_for_mysql(void) ...@@ -970,6 +971,11 @@ innobase_start_or_create_for_mysql(void)
#ifdef UNIV_MEM_DEBUG #ifdef UNIV_MEM_DEBUG
fprintf(stderr, fprintf(stderr,
"InnoDB: !!!!!!!!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!!!!!!!\n"); "InnoDB: !!!!!!!!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!!!!!!!\n");
#endif
#ifdef UNIV_SIMULATE_AWE
fprintf(stderr,
"InnoDB: !!!!!!!!!!!!!! UNIV_SIMULATE_AWE switched on !!!!!!!!!!!!!!!!!\n");
#endif #endif
if (srv_sizeof_trx_t_in_ha_innodb_cc != (ulint)sizeof(trx_t)) { if (srv_sizeof_trx_t_in_ha_innodb_cc != (ulint)sizeof(trx_t)) {
...@@ -1002,6 +1008,17 @@ innobase_start_or_create_for_mysql(void) ...@@ -1002,6 +1008,17 @@ innobase_start_or_create_for_mysql(void)
srv_startup_is_before_trx_rollback_phase = TRUE; srv_startup_is_before_trx_rollback_phase = TRUE;
os_aio_use_native_aio = FALSE; os_aio_use_native_aio = FALSE;
#if !defined(__NT__) && !defined(UNIV_SIMULATE_AWE)
if (srv_use_awe) {
fprintf(stderr,
"InnoDB: Error: You have specified innodb_buffer_pool_awe_mem_mb\n"
"InnoDB: in my.cnf, but AWE can only be used in Windows 2000 and later.\n");
return(DB_ERROR);
}
#endif
#ifdef __WIN__ #ifdef __WIN__
if (os_get_os_version() == OS_WIN95 if (os_get_os_version() == OS_WIN95
|| os_get_os_version() == OS_WIN31 || os_get_os_version() == OS_WIN31
...@@ -1057,6 +1074,9 @@ innobase_start_or_create_for_mysql(void) ...@@ -1057,6 +1074,9 @@ innobase_start_or_create_for_mysql(void)
return(DB_ERROR); return(DB_ERROR);
} }
/* Note that the call srv_boot() also changes the values of
srv_pool_size etc. to the units used by InnoDB internally */
err = srv_boot(); err = srv_boot();
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
...@@ -1088,7 +1108,26 @@ innobase_start_or_create_for_mysql(void) ...@@ -1088,7 +1108,26 @@ innobase_start_or_create_for_mysql(void)
fil_init(SRV_MAX_N_OPEN_FILES); fil_init(SRV_MAX_N_OPEN_FILES);
buf_pool_init(srv_pool_size, srv_pool_size); if (srv_use_awe) {
fprintf(stderr,
"InnoDB: Using AWE: Memory window is %lu MB and AWE memory is %lu MB\n",
srv_awe_window_size / ((1024 * 1024) / UNIV_PAGE_SIZE),
srv_pool_size / ((1024 * 1024) / UNIV_PAGE_SIZE));
/* We must disable adaptive hash indexes because they do not
tolerate remapping of pages in AWE */
srv_use_adaptive_hash_indexes = FALSE;
ret = buf_pool_init(srv_pool_size, srv_pool_size,
srv_awe_window_size);
} else {
ret = buf_pool_init(srv_pool_size, srv_pool_size,
srv_pool_size);
}
if (ret == NULL) {
return(DB_ERROR);
}
fsp_init(); fsp_init();
log_init(); log_init();
......
...@@ -472,9 +472,9 @@ trx_sys_update_mysql_binlog_offset( ...@@ -472,9 +472,9 @@ trx_sys_update_mysql_binlog_offset(
if (0 != ut_memcmp(sys_header + field + TRX_SYS_MYSQL_LOG_NAME, if (0 != ut_memcmp(sys_header + field + TRX_SYS_MYSQL_LOG_NAME,
file_name, 1 + ut_strlen(file_name))) { file_name, 1 + ut_strlen(file_name))) {
mlog_write_string((byte*) (sys_header + field mlog_write_string(sys_header + field
+ TRX_SYS_MYSQL_LOG_NAME), + TRX_SYS_MYSQL_LOG_NAME,
(byte*) file_name, 1 + ut_strlen(file_name), mtr); file_name, 1 + ut_strlen(file_name), mtr);
} }
if (mach_read_from_4(sys_header + field if (mach_read_from_4(sys_header + field
......
...@@ -99,7 +99,7 @@ trx_create( ...@@ -99,7 +99,7 @@ trx_create(
trx->mysql_log_file_name = NULL; trx->mysql_log_file_name = NULL;
trx->mysql_log_offset = 0; trx->mysql_log_offset = 0;
trx->mysql_master_log_file_name = (char*) ""; trx->mysql_master_log_file_name = "";
trx->mysql_master_log_pos = 0; trx->mysql_master_log_pos = 0;
trx->ignore_duplicates_in_insert = FALSE; trx->ignore_duplicates_in_insert = FALSE;
......
...@@ -197,6 +197,7 @@ ut_get_year_month_day( ...@@ -197,6 +197,7 @@ ut_get_year_month_day(
*month = (ulint)cal_tm.wMonth; *month = (ulint)cal_tm.wMonth;
*day = (ulint)cal_tm.wDay; *day = (ulint)cal_tm.wDay;
#else #else
struct tm cal_tm;
struct tm* cal_tm_ptr; struct tm* cal_tm_ptr;
time_t tm; time_t tm;
......
...@@ -82,7 +82,8 @@ are declared in mysqld.cc: */ ...@@ -82,7 +82,8 @@ are declared in mysqld.cc: */
long innobase_mirrored_log_groups, innobase_log_files_in_group, long innobase_mirrored_log_groups, innobase_log_files_in_group,
innobase_log_file_size, innobase_log_buffer_size, innobase_log_file_size, innobase_log_buffer_size,
innobase_buffer_pool_size, innobase_additional_mem_pool_size, innobase_buffer_pool_size, innobase_buffer_pool_awe_mem_mb,
innobase_additional_mem_pool_size,
innobase_file_io_threads, innobase_lock_wait_timeout, innobase_file_io_threads, innobase_lock_wait_timeout,
innobase_thread_concurrency, innobase_force_recovery; innobase_thread_concurrency, innobase_force_recovery;
...@@ -753,7 +754,25 @@ innobase_init(void) ...@@ -753,7 +754,25 @@ innobase_init(void)
srv_log_buffer_size = (ulint) innobase_log_buffer_size; srv_log_buffer_size = (ulint) innobase_log_buffer_size;
srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit; srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit;
srv_pool_size = (ulint) innobase_buffer_pool_size; /* We set srv_pool_size here in units of 1 kB. InnoDB internally
changes the value so that it becomes the number of database pages. */
if (innobase_buffer_pool_awe_mem_mb == 0) {
/* Careful here: we first convert the signed long int to ulint
and only after that divide */
srv_pool_size = ((ulint) innobase_buffer_pool_size) / 1024;
} else {
srv_use_awe = TRUE;
srv_pool_size = (ulint)
(1024 * innobase_buffer_pool_awe_mem_mb);
srv_awe_window_size = (ulint) innobase_buffer_pool_size;
/* Note that what the user specified as
innodb_buffer_pool_size is actually the AWE memory window
size in this case, and the real buffer pool size is
determined by .._awe_mem_mb. */
}
srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size; srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
......
...@@ -178,7 +178,8 @@ extern char *innobase_home, *innobase_tmpdir, *innobase_logdir; ...@@ -178,7 +178,8 @@ extern char *innobase_home, *innobase_tmpdir, *innobase_logdir;
extern long innobase_lock_scan_time; extern long innobase_lock_scan_time;
extern long innobase_mirrored_log_groups, innobase_log_files_in_group; extern long innobase_mirrored_log_groups, innobase_log_files_in_group;
extern long innobase_log_file_size, innobase_log_buffer_size; extern long innobase_log_file_size, innobase_log_buffer_size;
extern long innobase_buffer_pool_size, innobase_additional_mem_pool_size; extern long innobase_buffer_pool_size, innobase_buffer_pool_awe_mem_mb,
innobase_additional_mem_pool_size;
extern long innobase_file_io_threads, innobase_lock_wait_timeout; extern long innobase_file_io_threads, innobase_lock_wait_timeout;
extern long innobase_force_recovery, innobase_thread_concurrency; extern long innobase_force_recovery, innobase_thread_concurrency;
extern char *innobase_data_home_dir, *innobase_data_file_path; extern char *innobase_data_home_dir, *innobase_data_file_path;
......
...@@ -3194,6 +3194,7 @@ enum options { ...@@ -3194,6 +3194,7 @@ enum options {
OPT_INNODB_LOG_FILE_SIZE, OPT_INNODB_LOG_FILE_SIZE,
OPT_INNODB_LOG_BUFFER_SIZE, OPT_INNODB_LOG_BUFFER_SIZE,
OPT_INNODB_BUFFER_POOL_SIZE, OPT_INNODB_BUFFER_POOL_SIZE,
OPT_INNODB_BUFFER_POOL_AWE_MEM_MB,
OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE, OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE,
OPT_INNODB_FILE_IO_THREADS, OPT_INNODB_FILE_IO_THREADS,
OPT_INNODB_LOCK_WAIT_TIMEOUT, OPT_INNODB_LOCK_WAIT_TIMEOUT,
...@@ -3753,6 +3754,10 @@ struct my_option my_long_options[] = ...@@ -3753,6 +3754,10 @@ struct my_option my_long_options[] =
"The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
(gptr*) &innobase_buffer_pool_size, (gptr*) &innobase_buffer_pool_size, 0, (gptr*) &innobase_buffer_pool_size, (gptr*) &innobase_buffer_pool_size, 0,
GET_LONG, REQUIRED_ARG, 8*1024*1024L, 1024*1024L, ~0L, 0, 1024*1024L, 0}, GET_LONG, REQUIRED_ARG, 8*1024*1024L, 1024*1024L, ~0L, 0, 1024*1024L, 0},
{"innodb_buffer_pool_awe_mem_mb", OPT_INNODB_BUFFER_POOL_AWE_MEM_MB,
"If Windows AWE is used, the size of InnoDB buffer pool allocated from the AWE memory.",
(gptr*) &innobase_buffer_pool_awe_mem_mb, (gptr*) &innobase_buffer_pool_awe_mem_mb, 0,
GET_LONG, REQUIRED_ARG, 0, 0, 63000, 0, 1, 0},
{"innodb_additional_mem_pool_size", OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE, {"innodb_additional_mem_pool_size", OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE,
"Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.", "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.",
(gptr*) &innobase_additional_mem_pool_size, (gptr*) &innobase_additional_mem_pool_size,
......
...@@ -449,6 +449,7 @@ struct show_var_st init_vars[]= { ...@@ -449,6 +449,7 @@ struct show_var_st init_vars[]= {
#ifdef HAVE_INNOBASE_DB #ifdef HAVE_INNOBASE_DB
{"innodb_additional_mem_pool_size", (char*) &innobase_additional_mem_pool_size, SHOW_LONG }, {"innodb_additional_mem_pool_size", (char*) &innobase_additional_mem_pool_size, SHOW_LONG },
{"innodb_buffer_pool_size", (char*) &innobase_buffer_pool_size, SHOW_LONG }, {"innodb_buffer_pool_size", (char*) &innobase_buffer_pool_size, SHOW_LONG },
{"innodb_buffer_pool_awe_mem_mb", (char*) &innobase_buffer_pool_awe_mem_mb, SHOW_LONG },
{"innodb_data_file_path", (char*) &innobase_data_file_path, SHOW_CHAR_PTR}, {"innodb_data_file_path", (char*) &innobase_data_file_path, SHOW_CHAR_PTR},
{"innodb_data_home_dir", (char*) &innobase_data_home_dir, SHOW_CHAR_PTR}, {"innodb_data_home_dir", (char*) &innobase_data_home_dir, SHOW_CHAR_PTR},
{"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG }, {"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG },
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment