Commit 9497c997 authored by unknown's avatar unknown

buf0buf.c, buf0buf.ic, buf0buf.h:

  Reduce memory usage of the buffer headers
Many files:
  Merge InnoDB-4.1 with AWE support


sql/mysqld.cc:
  Merge InnoDB-4.1 with AWE support
sql/set_var.cc:
  Merge InnoDB-4.1 with AWE support
sql/ha_innodb.h:
  Merge InnoDB-4.1 with AWE support
sql/ha_innodb.cc:
  Merge InnoDB-4.1 with AWE support
innobase/btr/btr0cur.c:
  Merge InnoDB-4.1 with AWE support
innobase/btr/btr0pcur.c:
  Merge InnoDB-4.1 with AWE support
innobase/buf/buf0flu.c:
  Merge InnoDB-4.1 with AWE support
innobase/buf/buf0lru.c:
  Merge InnoDB-4.1 with AWE support
innobase/buf/buf0rea.c:
  Merge InnoDB-4.1 with AWE support
innobase/include/btr0pcur.h:
  Merge InnoDB-4.1 with AWE support
innobase/include/buf0lru.h:
  Merge InnoDB-4.1 with AWE support
innobase/include/log0recv.h:
  Merge InnoDB-4.1 with AWE support
innobase/include/os0proc.h:
  Merge InnoDB-4.1 with AWE support
innobase/include/srv0srv.h:
  Merge InnoDB-4.1 with AWE support
innobase/log/log0log.c:
  Merge InnoDB-4.1 with AWE support
innobase/log/log0recv.c:
  Merge InnoDB-4.1 with AWE support
innobase/os/os0file.c:
  Merge InnoDB-4.1 with AWE support
innobase/os/os0proc.c:
  Merge InnoDB-4.1 with AWE support
innobase/srv/srv0srv.c:
  Merge InnoDB-4.1 with AWE support
innobase/srv/srv0start.c:
  Merge InnoDB-4.1 with AWE support
innobase/trx/trx0sys.c:
  Merge InnoDB-4.1 with AWE support
innobase/trx/trx0trx.c:
  Merge InnoDB-4.1 with AWE support
innobase/ut/ut0ut.c:
  Merge InnoDB-4.1 with AWE support
innobase/include/buf0buf.h:
  Reduce memory usage of the buffer headers
innobase/include/buf0buf.ic:
  Reduce memory usage of the buffer headers
innobase/buf/buf0buf.c:
  Reduce memory usage of the buffer headers
parent ef62b4c9
...@@ -291,6 +291,7 @@ btr_cur_search_to_nth_level( ...@@ -291,6 +291,7 @@ btr_cur_search_to_nth_level(
&& latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
&& !estimate && !estimate
&& mode != PAGE_CUR_LE_OR_EXTENDS && mode != PAGE_CUR_LE_OR_EXTENDS
&& srv_use_adaptive_hash_indexes
&& btr_search_guess_on_hash(index, info, tuple, mode, && btr_search_guess_on_hash(index, info, tuple, mode,
latch_mode, cursor, latch_mode, cursor,
has_search_latch, mtr)) { has_search_latch, mtr)) {
...@@ -495,9 +496,11 @@ retry_page_get: ...@@ -495,9 +496,11 @@ retry_page_get:
cursor->up_bytes = up_bytes; cursor->up_bytes = up_bytes;
#ifdef BTR_CUR_ADAPT #ifdef BTR_CUR_ADAPT
if (srv_use_adaptive_hash_indexes) {
btr_search_info_update(index, cursor); btr_search_info_update(index, cursor);
}
#endif #endif
ut_ad(cursor->up_match != ULINT_UNDEFINED ut_ad(cursor->up_match != ULINT_UNDEFINED
|| mode != PAGE_CUR_GE); || mode != PAGE_CUR_GE);
ut_ad(cursor->up_match != ULINT_UNDEFINED ut_ad(cursor->up_match != ULINT_UNDEFINED
......
...@@ -95,7 +95,9 @@ btr_pcur_store_position( ...@@ -95,7 +95,9 @@ btr_pcur_store_position(
ut_a(cursor->latch_mode != BTR_NO_LATCHES); ut_a(cursor->latch_mode != BTR_NO_LATCHES);
if (page_get_n_recs(page) == 0) { if (page_get_n_recs(page) == 0) {
/* It must be an empty index tree */ /* It must be an empty index tree; NOTE that in this case
we do not store the modify_clock, but always do a search
if we restore the cursor position */
ut_a(btr_page_get_next(page, mtr) == FIL_NULL ut_a(btr_page_get_next(page, mtr) == FIL_NULL
&& btr_page_get_prev(page, mtr) == FIL_NULL); && btr_page_get_prev(page, mtr) == FIL_NULL);
...@@ -134,6 +136,7 @@ btr_pcur_store_position( ...@@ -134,6 +136,7 @@ btr_pcur_store_position(
&(cursor->old_rec_buf), &(cursor->old_rec_buf),
&(cursor->buf_size)); &(cursor->buf_size));
cursor->block_when_stored = buf_block_align(page);
cursor->modify_clock = buf_frame_get_modify_clock(page); cursor->modify_clock = buf_frame_get_modify_clock(page);
} }
...@@ -205,6 +208,9 @@ btr_pcur_restore_position( ...@@ -205,6 +208,9 @@ btr_pcur_restore_position(
if (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE if (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
|| cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) { || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
/* In these cases we do not try an optimistic restoration,
but always do a search */
if (cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) { if (cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
from_left = TRUE; from_left = TRUE;
} else { } else {
...@@ -214,6 +220,10 @@ btr_pcur_restore_position( ...@@ -214,6 +220,10 @@ btr_pcur_restore_position(
btr_cur_open_at_index_side(from_left, btr_cur_open_at_index_side(from_left,
btr_pcur_get_btr_cur(cursor)->index, latch_mode, btr_pcur_get_btr_cur(cursor)->index, latch_mode,
btr_pcur_get_btr_cur(cursor), mtr); btr_pcur_get_btr_cur(cursor), mtr);
cursor->block_when_stored =
buf_block_align(btr_pcur_get_page(cursor));
return(FALSE); return(FALSE);
} }
...@@ -224,7 +234,8 @@ btr_pcur_restore_position( ...@@ -224,7 +234,8 @@ btr_pcur_restore_position(
if (latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF) { if (latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF) {
/* Try optimistic restoration */ /* Try optimistic restoration */
if (buf_page_optimistic_get(latch_mode, page, if (buf_page_optimistic_get(latch_mode,
cursor->block_when_stored, page,
cursor->modify_clock, mtr)) { cursor->modify_clock, mtr)) {
cursor->pos_state = BTR_PCUR_IS_POSITIONED; cursor->pos_state = BTR_PCUR_IS_POSITIONED;
...@@ -271,8 +282,6 @@ btr_pcur_restore_position( ...@@ -271,8 +282,6 @@ btr_pcur_restore_position(
btr_pcur_open_with_no_init(btr_pcur_get_btr_cur(cursor)->index, tuple, btr_pcur_open_with_no_init(btr_pcur_get_btr_cur(cursor)->index, tuple,
mode, latch_mode, cursor, 0, mtr); mode, latch_mode, cursor, 0, mtr);
cursor->old_stored = BTR_PCUR_OLD_STORED;
/* Restore the old search mode */ /* Restore the old search mode */
cursor->search_mode = old_mode; cursor->search_mode = old_mode;
...@@ -281,10 +290,17 @@ btr_pcur_restore_position( ...@@ -281,10 +290,17 @@ btr_pcur_restore_position(
&& 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor))) { && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor))) {
/* We have to store the NEW value for the modify clock, since /* We have to store the NEW value for the modify clock, since
the cursor can now be on a different page! */ the cursor can now be on a different page! But we can retain
the value of old_rec */
cursor->modify_clock =
buf_frame_get_modify_clock(btr_pcur_get_page(cursor));
cursor->block_when_stored =
buf_block_align(btr_pcur_get_page(cursor));
cursor->old_stored = BTR_PCUR_OLD_STORED;
cursor->modify_clock = buf_frame_get_modify_clock(
buf_frame_align(btr_pcur_get_rec(cursor)));
mem_heap_free(heap); mem_heap_free(heap);
return(TRUE); return(TRUE);
...@@ -292,6 +308,12 @@ btr_pcur_restore_position( ...@@ -292,6 +308,12 @@ btr_pcur_restore_position(
mem_heap_free(heap); mem_heap_free(heap);
/* We have to store new position information, modify_clock etc.,
to the cursor because it can now be on a different page, the record
under it may have been removed, etc. */
btr_pcur_store_position(cursor, mtr);
return(FALSE); return(FALSE);
} }
......
...@@ -196,7 +196,29 @@ If a new page is referenced in the buf_pool, and several pages ...@@ -196,7 +196,29 @@ If a new page is referenced in the buf_pool, and several pages
of its random access area (for instance, 32 consecutive pages of its random access area (for instance, 32 consecutive pages
in a tablespace) have recently been referenced, we may predict in a tablespace) have recently been referenced, we may predict
that the whole area may be needed in the near future, and issue that the whole area may be needed in the near future, and issue
the read requests for the whole area. */ the read requests for the whole area.
AWE implementation
------------------
By a 'block' we mean the buffer header of type buf_block_t. By a 'page'
we mean the physical 16 kB memory area allocated from RAM for that block.
By a 'frame' we mean a 16 kB area in the virtual address space of the
process, in the frame_mem of buf_pool.
We can map pages to the frames of the buffer pool.
1) A buffer block allocated to use as a non-data page, e.g., to the lock
table, is always mapped to a frame.
2) A bufferfixed or io-fixed data page is always mapped to a frame.
3) When we need to map a block to frame, we look from the list
awe_LRU_free_mapped and try to unmap its last block, but note that
bufferfixed or io-fixed pages cannot be unmapped.
4) For every frame in the buffer pool there is always a block whose page is
mapped to it. When we create the buffer pool, we map the first elements
in the free list to the frames.
5) When we have AWE enabled, we disable adaptive hash indexes.
*/
buf_pool_t* buf_pool = NULL; /* The buffer buf_pool of the database */ buf_pool_t* buf_pool = NULL; /* The buffer buf_pool of the database */
...@@ -346,12 +368,15 @@ void ...@@ -346,12 +368,15 @@ void
buf_block_init( buf_block_init(
/*===========*/ /*===========*/
buf_block_t* block, /* in: pointer to control block */ buf_block_t* block, /* in: pointer to control block */
byte* frame) /* in: pointer to buffer frame */ byte* frame) /* in: pointer to buffer frame, or NULL if in
the case of AWE there is no frame */
{ {
block->state = BUF_BLOCK_NOT_USED; block->state = BUF_BLOCK_NOT_USED;
block->frame = frame; block->frame = frame;
block->awe_info = NULL;
block->modify_clock = ut_dulint_zero; block->modify_clock = ut_dulint_zero;
block->file_page_was_freed = FALSE; block->file_page_was_freed = FALSE;
...@@ -364,29 +389,37 @@ buf_block_init( ...@@ -364,29 +389,37 @@ buf_block_init(
rw_lock_create(&(block->read_lock)); rw_lock_create(&(block->read_lock));
rw_lock_set_level(&(block->read_lock), SYNC_NO_ORDER_CHECK); rw_lock_set_level(&(block->read_lock), SYNC_NO_ORDER_CHECK);
#ifdef UNIV_SYNC_DEBUG
rw_lock_create(&(block->debug_latch)); rw_lock_create(&(block->debug_latch));
rw_lock_set_level(&(block->debug_latch), SYNC_NO_ORDER_CHECK); rw_lock_set_level(&(block->debug_latch), SYNC_NO_ORDER_CHECK);
#endif
} }
/************************************************************************ /************************************************************************
Creates a buffer buf_pool object. */ Creates the buffer pool. */
static
buf_pool_t* buf_pool_t*
buf_pool_create( buf_pool_init(
/*============*/ /*==========*/
/* out, own: buf_pool object, NULL if not /* out, own: buf_pool object, NULL if not
enough memory */ enough memory or error */
ulint max_size, /* in: maximum size of the buf_pool in ulint max_size, /* in: maximum size of the buf_pool in
blocks */ blocks */
ulint curr_size) /* in: current size to use, must be <= ulint curr_size, /* in: current size to use, must be <=
max_size, currently must be equal to max_size, currently must be equal to
max_size */ max_size */
ulint n_frames) /* in: number of frames; if AWE is used,
this is the size of the address space window
where physical memory pages are mapped; if
AWE is not used then this must be the same
as max_size */
{ {
byte* frame; byte* frame;
ulint i; ulint i;
buf_block_t* block; buf_block_t* block;
ut_a(max_size == curr_size); ut_a(max_size == curr_size);
ut_a(srv_use_awe || n_frames == max_size);
buf_pool = mem_alloc(sizeof(buf_pool_t)); buf_pool = mem_alloc(sizeof(buf_pool_t));
...@@ -397,7 +430,37 @@ buf_pool_create( ...@@ -397,7 +430,37 @@ buf_pool_create(
mutex_enter(&(buf_pool->mutex)); mutex_enter(&(buf_pool->mutex));
buf_pool->frame_mem = ut_malloc(UNIV_PAGE_SIZE * (max_size + 1)); if (srv_use_awe) {
/*----------------------------------------*/
/* Allocate the virtual address space window, i.e., the
buffer pool frames */
buf_pool->frame_mem = os_awe_allocate_virtual_mem_window(
UNIV_PAGE_SIZE * (n_frames + 1));
/* Allocate the physical memory for AWE and the AWE info array
for buf_pool */
if ((curr_size % ((1024 * 1024) / UNIV_PAGE_SIZE)) != 0) {
fprintf(stderr,
"InnoDB: AWE: Error: physical memory must be allocated in full megabytes.\n"
"InnoDB: Trying to allocate %lu database pages.\n",
curr_size);
return(NULL);
}
if (!os_awe_allocate_physical_mem(&(buf_pool->awe_info),
curr_size / ((1024 * 1024) / UNIV_PAGE_SIZE))) {
return(NULL);
}
/*----------------------------------------*/
} else {
buf_pool->frame_mem = ut_malloc(
UNIV_PAGE_SIZE * (n_frames + 1));
}
if (buf_pool->frame_mem == NULL) { if (buf_pool->frame_mem == NULL) {
...@@ -414,19 +477,58 @@ buf_pool_create( ...@@ -414,19 +477,58 @@ buf_pool_create(
buf_pool->max_size = max_size; buf_pool->max_size = max_size;
buf_pool->curr_size = curr_size; buf_pool->curr_size = curr_size;
buf_pool->n_frames = n_frames;
/* Align pointer to the first frame */ /* Align pointer to the first frame */
frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE); frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE);
buf_pool->frame_zero = frame;
buf_pool->frame_zero = frame;
buf_pool->high_end = frame + UNIV_PAGE_SIZE * curr_size; buf_pool->high_end = frame + UNIV_PAGE_SIZE * curr_size;
/* Init block structs and assign frames for them */ if (srv_use_awe) {
/*----------------------------------------*/
/* Map an initial part of the allocated physical memory to
the window */
os_awe_map_physical_mem_to_window(buf_pool->frame_zero,
n_frames *
(UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE),
buf_pool->awe_info);
/*----------------------------------------*/
}
buf_pool->blocks_of_frames = ut_malloc(sizeof(void*) * n_frames);
if (buf_pool->blocks_of_frames == NULL) {
return(NULL);
}
/* Init block structs and assign frames for them; in the case of
AWE there are less frames than blocks. Then we assign the frames
to the first blocks (we already mapped the memory above). We also
init the awe_info for every block. */
for (i = 0; i < max_size; i++) { for (i = 0; i < max_size; i++) {
block = buf_pool_get_nth_block(buf_pool, i); block = buf_pool_get_nth_block(buf_pool, i);
if (i < n_frames) {
frame = buf_pool->frame_zero + i * UNIV_PAGE_SIZE;
*(buf_pool->blocks_of_frames + i) = block;
} else {
frame = NULL;
}
buf_block_init(block, frame); buf_block_init(block, frame);
frame = frame + UNIV_PAGE_SIZE;
if (srv_use_awe) {
/*----------------------------------------*/
block->awe_info = buf_pool->awe_info
+ i * (UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE);
/*----------------------------------------*/
}
} }
buf_pool->page_hash = hash_create(2 * max_size); buf_pool->page_hash = hash_create(2 * max_size);
...@@ -438,12 +540,14 @@ buf_pool_create( ...@@ -438,12 +540,14 @@ buf_pool_create(
buf_pool->n_pages_read = 0; buf_pool->n_pages_read = 0;
buf_pool->n_pages_written = 0; buf_pool->n_pages_written = 0;
buf_pool->n_pages_created = 0; buf_pool->n_pages_created = 0;
buf_pool->n_pages_awe_remapped = 0;
buf_pool->n_page_gets = 0; buf_pool->n_page_gets = 0;
buf_pool->n_page_gets_old = 0; buf_pool->n_page_gets_old = 0;
buf_pool->n_pages_read_old = 0; buf_pool->n_pages_read_old = 0;
buf_pool->n_pages_written_old = 0; buf_pool->n_pages_written_old = 0;
buf_pool->n_pages_created_old = 0; buf_pool->n_pages_created_old = 0;
buf_pool->n_pages_awe_remapped_old = 0;
/* 2. Initialize flushing fields /* 2. Initialize flushing fields
---------------------------- */ ---------------------------- */
...@@ -466,40 +570,120 @@ buf_pool_create( ...@@ -466,40 +570,120 @@ buf_pool_create(
buf_pool->LRU_old = NULL; buf_pool->LRU_old = NULL;
UT_LIST_INIT(buf_pool->awe_LRU_free_mapped);
/* Add control blocks to the free list */ /* Add control blocks to the free list */
UT_LIST_INIT(buf_pool->free); UT_LIST_INIT(buf_pool->free);
for (i = 0; i < curr_size; i++) { for (i = 0; i < curr_size; i++) {
block = buf_pool_get_nth_block(buf_pool, i); block = buf_pool_get_nth_block(buf_pool, i);
/* Wipe contents of page to eliminate a Purify warning */ if (block->frame) {
/* Wipe contents of frame to eliminate a Purify
warning */
memset(block->frame, '\0', UNIV_PAGE_SIZE); memset(block->frame, '\0', UNIV_PAGE_SIZE);
UT_LIST_ADD_FIRST(free, buf_pool->free, block); if (srv_use_awe) {
/* Add to the list of blocks mapped to
frames */
UT_LIST_ADD_LAST(awe_LRU_free_mapped,
buf_pool->awe_LRU_free_mapped, block);
}
}
UT_LIST_ADD_LAST(free, buf_pool->free, block);
} }
mutex_exit(&(buf_pool->mutex)); mutex_exit(&(buf_pool->mutex));
btr_search_sys_create(curr_size * UNIV_PAGE_SIZE / sizeof(void*) / 64); if (srv_use_adaptive_hash_indexes) {
btr_search_sys_create(
curr_size * UNIV_PAGE_SIZE / sizeof(void*) / 64);
} else {
/* Create only a small dummy system */
btr_search_sys_create(1000);
}
return(buf_pool); return(buf_pool);
} }
/************************************************************************ /************************************************************************
Initializes the buffer buf_pool of the database. */ Maps the page of block to a frame, if not mapped yet. Unmaps some page
from the end of the awe_LRU_free_mapped. */
void void
buf_pool_init( buf_awe_map_page_to_frame(
/*==========*/ /*======================*/
ulint max_size, /* in: maximum size of the buf_pool in blocks */ buf_block_t* block, /* in: block whose page should be
ulint curr_size) /* in: current size to use, must be <= mapped to a frame */
max_size */ ibool add_to_mapped_list) /* in: TRUE if we in the case
we need to map the page should also
add the block to the
awe_LRU_free_mapped list */
{ {
ut_a(buf_pool == NULL); buf_block_t* bck;
buf_pool_create(max_size, curr_size); ut_ad(mutex_own(&(buf_pool->mutex)));
ut_ad(block);
ut_ad(buf_validate()); if (block->frame) {
return;
}
/* Scan awe_LRU_free_mapped from the end and try to find a block
which is not bufferfixed or io-fixed */
bck = UT_LIST_GET_LAST(buf_pool->awe_LRU_free_mapped);
while (bck) {
if (bck->state == BUF_BLOCK_FILE_PAGE
&& (bck->buf_fix_count != 0 || bck->io_fix != 0)) {
/* We have to skip this */
bck = UT_LIST_GET_PREV(awe_LRU_free_mapped, bck);
} else {
/* We can map block to the frame of bck */
os_awe_map_physical_mem_to_window(
bck->frame,
UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE,
block->awe_info);
block->frame = bck->frame;
*(buf_pool->blocks_of_frames
+ (((ulint)(block->frame
- buf_pool->frame_zero))
>> UNIV_PAGE_SIZE_SHIFT))
= block;
bck->frame = NULL;
UT_LIST_REMOVE(awe_LRU_free_mapped,
buf_pool->awe_LRU_free_mapped,
bck);
if (add_to_mapped_list) {
UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
buf_pool->awe_LRU_free_mapped,
block);
}
buf_pool->n_pages_awe_remapped++;
return;
}
}
fprintf(stderr,
"InnoDB: AWE: Fatal error: cannot find a page to unmap\n"
"InnoDB: awe_LRU_free_mapped list length %lu\n",
UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
ut_a(0);
} }
/************************************************************************ /************************************************************************
...@@ -508,7 +692,9 @@ UNIV_INLINE ...@@ -508,7 +692,9 @@ UNIV_INLINE
buf_block_t* buf_block_t*
buf_block_alloc(void) buf_block_alloc(void)
/*=================*/ /*=================*/
/* out, own: the allocated block */ /* out, own: the allocated block; also if AWE
is used it is guaranteed that the page is
mapped to a frame */
{ {
buf_block_t* block; buf_block_t* block;
...@@ -846,6 +1032,19 @@ loop: ...@@ -846,6 +1032,19 @@ loop:
} }
} }
/* If AWE is enabled and the page is not mapped to a frame, then
map it */
if (block->frame == NULL) {
ut_a(srv_use_awe);
/* We set second parameter TRUE because the block is in the
LRU list and we must put it to awe_LRU_free_mapped list once
mapped to a frame */
buf_awe_map_page_to_frame(block, TRUE);
}
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
buf_block_buf_fix_inc_debug(block, file, line); buf_block_buf_fix_inc_debug(block, file, line);
#else #else
...@@ -940,28 +1139,27 @@ buf_page_optimistic_get_func( ...@@ -940,28 +1139,27 @@ buf_page_optimistic_get_func(
/*=========================*/ /*=========================*/
/* out: TRUE if success */ /* out: TRUE if success */
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
buf_frame_t* guess, /* in: guessed frame */ buf_block_t* block, /* in: guessed buffer block */
buf_frame_t* guess, /* in: guessed frame; note that AWE may move
frames */
dulint modify_clock,/* in: modify clock value if mode is dulint modify_clock,/* in: modify clock value if mode is
..._GUESS_ON_CLOCK */ ..._GUESS_ON_CLOCK */
char* file, /* in: file name */ char* file, /* in: file name */
ulint line, /* in: line where called */ ulint line, /* in: line where called */
mtr_t* mtr) /* in: mini-transaction */ mtr_t* mtr) /* in: mini-transaction */
{ {
buf_block_t* block;
ibool accessed; ibool accessed;
ibool success; ibool success;
ulint fix_type; ulint fix_type;
ut_ad(mtr && guess); ut_ad(mtr && block);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
buf_pool->n_page_gets++;
block = buf_block_align(guess);
mutex_enter(&(buf_pool->mutex)); mutex_enter(&(buf_pool->mutex));
if (block->state != BUF_BLOCK_FILE_PAGE) { /* If AWE is used, block may have a different frame now, e.g., NULL */
if (block->state != BUF_BLOCK_FILE_PAGE || block->frame != guess) {
mutex_exit(&(buf_pool->mutex)); mutex_exit(&(buf_pool->mutex));
...@@ -1054,12 +1252,15 @@ buf_page_optimistic_get_func( ...@@ -1054,12 +1252,15 @@ buf_page_optimistic_get_func(
#ifdef UNIV_IBUF_DEBUG #ifdef UNIV_IBUF_DEBUG
ut_a(ibuf_count_get(block->space, block->offset) == 0); ut_a(ibuf_count_get(block->space, block->offset) == 0);
#endif #endif
buf_pool->n_page_gets++;
return(TRUE); return(TRUE);
} }
/************************************************************************ /************************************************************************
This is used to get access to a known database page, when no waiting can be This is used to get access to a known database page, when no waiting can be
done. */ done. For example, if a search in an adaptive hash index leads us to this
frame. */
ibool ibool
buf_page_get_known_nowait( buf_page_get_known_nowait(
...@@ -1079,12 +1280,10 @@ buf_page_get_known_nowait( ...@@ -1079,12 +1280,10 @@ buf_page_get_known_nowait(
ut_ad(mtr); ut_ad(mtr);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
buf_pool->n_page_gets++; mutex_enter(&(buf_pool->mutex));
block = buf_block_align(guess); block = buf_block_align(guess);
mutex_enter(&(buf_pool->mutex));
if (block->state == BUF_BLOCK_REMOVE_HASH) { if (block->state == BUF_BLOCK_REMOVE_HASH) {
/* Another thread is just freeing the block from the LRU list /* Another thread is just freeing the block from the LRU list
of the buffer pool: do not try to access this page; this of the buffer pool: do not try to access this page; this
...@@ -1152,6 +1351,8 @@ buf_page_get_known_nowait( ...@@ -1152,6 +1351,8 @@ buf_page_get_known_nowait(
ut_a((mode == BUF_KEEP_OLD) ut_a((mode == BUF_KEEP_OLD)
|| (ibuf_count_get(block->space, block->offset) == 0)); || (ibuf_count_get(block->space, block->offset) == 0));
#endif #endif
buf_pool->n_page_gets++;
return(TRUE); return(TRUE);
} }
...@@ -1732,7 +1933,7 @@ buf_print(void) ...@@ -1732,7 +1933,7 @@ buf_print(void)
ut_ad(buf_pool); ut_ad(buf_pool);
size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE; size = buf_pool->curr_size;
index_ids = mem_alloc(sizeof(dulint) * size); index_ids = mem_alloc(sizeof(dulint) * size);
counts = mem_alloc(sizeof(ulint) * size); counts = mem_alloc(sizeof(ulint) * size);
...@@ -1847,7 +2048,7 @@ buf_print_io( ...@@ -1847,7 +2048,7 @@ buf_print_io(
return; return;
} }
size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE; size = buf_pool->curr_size;
mutex_enter(&(buf_pool->mutex)); mutex_enter(&(buf_pool->mutex));
...@@ -1866,6 +2067,15 @@ buf_print_io( ...@@ -1866,6 +2067,15 @@ buf_print_io(
buf += sprintf(buf, buf += sprintf(buf,
"Modified db pages %lu\n", "Modified db pages %lu\n",
UT_LIST_GET_LEN(buf_pool->flush_list)); UT_LIST_GET_LEN(buf_pool->flush_list));
if (srv_use_awe) {
buf += sprintf(buf,
"AWE: Buffer pool memory frames %lu\n",
buf_pool->n_frames);
buf += sprintf(buf,
"AWE: Database pages and free buffers mapped in frames %lu\n",
UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
}
buf += sprintf(buf, "Pending reads %lu \n", buf_pool->n_pend_reads); buf += sprintf(buf, "Pending reads %lu \n", buf_pool->n_pend_reads);
...@@ -1891,6 +2101,13 @@ buf_print_io( ...@@ -1891,6 +2101,13 @@ buf_print_io(
(buf_pool->n_pages_written - buf_pool->n_pages_written_old) (buf_pool->n_pages_written - buf_pool->n_pages_written_old)
/ time_elapsed); / time_elapsed);
if (srv_use_awe) {
buf += sprintf(buf, "AWE: %.2f page remaps/s\n",
(buf_pool->n_pages_awe_remapped
- buf_pool->n_pages_awe_remapped_old)
/ time_elapsed);
}
if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) { if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
buf += sprintf(buf, "Buffer pool hit rate %lu / 1000\n", buf += sprintf(buf, "Buffer pool hit rate %lu / 1000\n",
1000 1000
...@@ -1906,6 +2123,7 @@ buf_print_io( ...@@ -1906,6 +2123,7 @@ buf_print_io(
buf_pool->n_pages_read_old = buf_pool->n_pages_read; buf_pool->n_pages_read_old = buf_pool->n_pages_read;
buf_pool->n_pages_created_old = buf_pool->n_pages_created; buf_pool->n_pages_created_old = buf_pool->n_pages_created;
buf_pool->n_pages_written_old = buf_pool->n_pages_written; buf_pool->n_pages_written_old = buf_pool->n_pages_written;
buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
mutex_exit(&(buf_pool->mutex)); mutex_exit(&(buf_pool->mutex));
} }
...@@ -1922,6 +2140,7 @@ buf_refresh_io_stats(void) ...@@ -1922,6 +2140,7 @@ buf_refresh_io_stats(void)
buf_pool->n_pages_read_old = buf_pool->n_pages_read; buf_pool->n_pages_read_old = buf_pool->n_pages_read;
buf_pool->n_pages_created_old = buf_pool->n_pages_created; buf_pool->n_pages_created_old = buf_pool->n_pages_created;
buf_pool->n_pages_written_old = buf_pool->n_pages_written; buf_pool->n_pages_written_old = buf_pool->n_pages_written;
buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
} }
/************************************************************************* /*************************************************************************
......
...@@ -24,6 +24,7 @@ Created 11/11/1995 Heikki Tuuri ...@@ -24,6 +24,7 @@ Created 11/11/1995 Heikki Tuuri
#include "log0log.h" #include "log0log.h"
#include "os0file.h" #include "os0file.h"
#include "trx0sys.h" #include "trx0sys.h"
#include "srv0srv.h"
/* When flushed, dirty blocks are searched in neigborhoods of this size, and /* When flushed, dirty blocks are searched in neigborhoods of this size, and
flushed along with the original page. */ flushed along with the original page. */
...@@ -103,7 +104,7 @@ buf_flush_ready_for_replace( ...@@ -103,7 +104,7 @@ buf_flush_ready_for_replace(
/*========================*/ /*========================*/
/* out: TRUE if can replace immediately */ /* out: TRUE if can replace immediately */
buf_block_t* block) /* in: buffer control block, must be in state buf_block_t* block) /* in: buffer control block, must be in state
BUF_BLOCK_FILE_PAGE and in the LRU list*/ BUF_BLOCK_FILE_PAGE and in the LRU list */
{ {
ut_ad(mutex_own(&(buf_pool->mutex))); ut_ad(mutex_own(&(buf_pool->mutex)));
ut_ad(block->state == BUF_BLOCK_FILE_PAGE); ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
...@@ -134,7 +135,6 @@ buf_flush_ready_for_flush( ...@@ -134,7 +135,6 @@ buf_flush_ready_for_flush(
if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0) if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
&& (block->io_fix == 0)) { && (block->io_fix == 0)) {
if (flush_type != BUF_FLUSH_LRU) { if (flush_type != BUF_FLUSH_LRU) {
return(TRUE); return(TRUE);
...@@ -436,6 +436,20 @@ buf_flush_try_page( ...@@ -436,6 +436,20 @@ buf_flush_try_page(
&& block && buf_flush_ready_for_flush(block, flush_type)) { && block && buf_flush_ready_for_flush(block, flush_type)) {
block->io_fix = BUF_IO_WRITE; block->io_fix = BUF_IO_WRITE;
/* If AWE is enabled and the page is not mapped to a frame,
then map it */
if (block->frame == NULL) {
ut_a(srv_use_awe);
/* We set second parameter TRUE because the block is
in the LRU list and we must put it to
awe_LRU_free_mapped list once mapped to a frame */
buf_awe_map_page_to_frame(block, TRUE);
}
block->flush_type = flush_type; block->flush_type = flush_type;
if (buf_pool->n_flush[flush_type] == 0) { if (buf_pool->n_flush[flush_type] == 0) {
...@@ -486,6 +500,20 @@ buf_flush_try_page( ...@@ -486,6 +500,20 @@ buf_flush_try_page(
..._ready_for_flush). */ ..._ready_for_flush). */
block->io_fix = BUF_IO_WRITE; block->io_fix = BUF_IO_WRITE;
/* If AWE is enabled and the page is not mapped to a frame,
then map it */
if (block->frame == NULL) {
ut_a(srv_use_awe);
/* We set second parameter TRUE because the block is
in the LRU list and we must put it to
awe_LRU_free_mapped list once mapped to a frame */
buf_awe_map_page_to_frame(block, TRUE);
}
block->flush_type = flush_type; block->flush_type = flush_type;
if (buf_pool->n_flush[flush_type] == 0) { if (buf_pool->n_flush[flush_type] == 0) {
...@@ -511,6 +539,20 @@ buf_flush_try_page( ...@@ -511,6 +539,20 @@ buf_flush_try_page(
&& buf_flush_ready_for_flush(block, flush_type)) { && buf_flush_ready_for_flush(block, flush_type)) {
block->io_fix = BUF_IO_WRITE; block->io_fix = BUF_IO_WRITE;
/* If AWE is enabled and the page is not mapped to a frame,
then map it */
if (block->frame == NULL) {
ut_a(srv_use_awe);
/* We set second parameter TRUE because the block is
in the LRU list and we must put it to
awe_LRU_free_mapped list once mapped to a frame */
buf_awe_map_page_to_frame(block, TRUE);
}
block->flush_type = flush_type; block->flush_type = flush_type;
if (buf_pool->n_flush[block->flush_type] == 0) { if (buf_pool->n_flush[block->flush_type] == 0) {
......
...@@ -132,7 +132,13 @@ buf_LRU_search_and_free_block( ...@@ -132,7 +132,13 @@ buf_LRU_search_and_free_block(
mutex_exit(&(buf_pool->mutex)); mutex_exit(&(buf_pool->mutex));
/* Remove possible adaptive hash index built on the
page; in the case of AWE the block may not have a
frame at all */
if (block->frame) {
btr_search_drop_page_hash_index(block->frame); btr_search_drop_page_hash_index(block->frame);
}
mutex_enter(&(buf_pool->mutex)); mutex_enter(&(buf_pool->mutex));
...@@ -196,7 +202,9 @@ list. */ ...@@ -196,7 +202,9 @@ list. */
buf_block_t* buf_block_t*
buf_LRU_get_free_block(void) buf_LRU_get_free_block(void)
/*========================*/ /*========================*/
/* out: the free control block */ /* out: the free control block; also if AWE is
used, it is guaranteed that the block has its
page mapped to a frame when we return */
{ {
buf_block_t* block = NULL; buf_block_t* block = NULL;
ibool freed; ibool freed;
...@@ -257,6 +265,22 @@ loop: ...@@ -257,6 +265,22 @@ loop:
block = UT_LIST_GET_FIRST(buf_pool->free); block = UT_LIST_GET_FIRST(buf_pool->free);
UT_LIST_REMOVE(free, buf_pool->free, block); UT_LIST_REMOVE(free, buf_pool->free, block);
if (srv_use_awe) {
if (block->frame) {
/* Remove from the list of mapped pages */
UT_LIST_REMOVE(awe_LRU_free_mapped,
buf_pool->awe_LRU_free_mapped, block);
} else {
/* We map the page to a frame; second param
FALSE below because we do not want it to be
added to the awe_LRU_free_mapped list */
buf_awe_map_page_to_frame(block, FALSE);
}
}
block->state = BUF_BLOCK_READY_FOR_USE; block->state = BUF_BLOCK_READY_FOR_USE;
mutex_exit(&(buf_pool->mutex)); mutex_exit(&(buf_pool->mutex));
...@@ -429,6 +453,13 @@ buf_LRU_remove_block( ...@@ -429,6 +453,13 @@ buf_LRU_remove_block(
/* Remove the block from the LRU list */ /* Remove the block from the LRU list */
UT_LIST_REMOVE(LRU, buf_pool->LRU, block); UT_LIST_REMOVE(LRU, buf_pool->LRU, block);
if (srv_use_awe && block->frame) {
/* Remove from the list of mapped pages */
UT_LIST_REMOVE(awe_LRU_free_mapped,
buf_pool->awe_LRU_free_mapped, block);
}
/* If the LRU list is so short that LRU_old not defined, return */ /* If the LRU list is so short that LRU_old not defined, return */
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) { if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
...@@ -475,6 +506,13 @@ buf_LRU_add_block_to_end_low( ...@@ -475,6 +506,13 @@ buf_LRU_add_block_to_end_low(
UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block); UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block);
if (srv_use_awe && block->frame) {
/* Add to the list of mapped pages */
UT_LIST_ADD_LAST(awe_LRU_free_mapped,
buf_pool->awe_LRU_free_mapped, block);
}
if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) { if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
buf_pool->LRU_old_len++; buf_pool->LRU_old_len++;
...@@ -518,6 +556,15 @@ buf_LRU_add_block_low( ...@@ -518,6 +556,15 @@ buf_LRU_add_block_low(
block->old = old; block->old = old;
cl = buf_pool_clock_tic(); cl = buf_pool_clock_tic();
if (srv_use_awe && block->frame) {
/* Add to the list of mapped pages; for simplicity we always
add to the start, even if the user would have set 'old'
TRUE */
UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
buf_pool->awe_LRU_free_mapped, block);
}
if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) { if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block); UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block);
...@@ -613,6 +660,13 @@ buf_LRU_block_free_non_file_page( ...@@ -613,6 +660,13 @@ buf_LRU_block_free_non_file_page(
memset(block->frame, '\0', UNIV_PAGE_SIZE); memset(block->frame, '\0', UNIV_PAGE_SIZE);
#endif #endif
UT_LIST_ADD_FIRST(free, buf_pool->free, block); UT_LIST_ADD_FIRST(free, buf_pool->free, block);
if (srv_use_awe && block->frame) {
/* Add to the list of mapped pages */
UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
buf_pool->awe_LRU_free_mapped, block);
}
} }
/********************************************************************** /**********************************************************************
...@@ -639,7 +693,9 @@ buf_LRU_block_remove_hashed_page( ...@@ -639,7 +693,9 @@ buf_LRU_block_remove_hashed_page(
buf_pool->freed_page_clock += 1; buf_pool->freed_page_clock += 1;
buf_frame_modify_clock_inc(block->frame); /* Note that if AWE is enabled the block may not have a frame at all */
buf_block_modify_clock_inc(block);
HASH_DELETE(buf_block_t, hash, buf_pool->page_hash, HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
buf_page_address_fold(block->space, block->offset), buf_page_address_fold(block->space, block->offset),
......
...@@ -576,7 +576,7 @@ buf_read_recv_pages( ...@@ -576,7 +576,7 @@ buf_read_recv_pages(
os_aio_print_debug = FALSE; os_aio_print_debug = FALSE;
while (buf_pool->n_pend_reads >= RECV_POOL_N_FREE_BLOCKS / 2) { while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
os_aio_simulated_wake_handler_threads(); os_aio_simulated_wake_handler_threads();
os_thread_sleep(500000); os_thread_sleep(500000);
......
...@@ -466,6 +466,9 @@ struct btr_pcur_struct{ ...@@ -466,6 +466,9 @@ struct btr_pcur_struct{
BTR_PCUR_AFTER, depending on whether BTR_PCUR_AFTER, depending on whether
cursor was on, before, or after the cursor was on, before, or after the
old_rec record */ old_rec record */
buf_block_t* block_when_stored;/* buffer block when the position was
stored; note that if AWE is on, frames
may move */
dulint modify_clock; /* the modify clock value of the dulint modify_clock; /* the modify clock value of the
buffer block when the cursor position buffer block when the cursor position
was stored */ was stored */
......
...@@ -30,6 +30,7 @@ Created 11/5/1995 Heikki Tuuri ...@@ -30,6 +30,7 @@ Created 11/5/1995 Heikki Tuuri
#include "sync0rw.h" #include "sync0rw.h"
#include "hash0hash.h" #include "hash0hash.h"
#include "ut0byte.h" #include "ut0byte.h"
#include "os0proc.h"
/* Flags for flush types */ /* Flags for flush types */
#define BUF_FLUSH_LRU 1 #define BUF_FLUSH_LRU 1
...@@ -58,23 +59,34 @@ extern ibool buf_debug_prints;/* If this is set TRUE, the program ...@@ -58,23 +59,34 @@ extern ibool buf_debug_prints;/* If this is set TRUE, the program
occurs */ occurs */
/************************************************************************ /************************************************************************
Initializes the buffer pool of the database. */ Creates the buffer pool. */
void buf_pool_t*
buf_pool_init( buf_pool_init(
/*==========*/ /*==========*/
ulint max_size, /* in: maximum size of the pool in blocks */ /* out, own: buf_pool object, NULL if not
ulint curr_size); /* in: current size to use, must be <= enough memory or error */
ulint max_size, /* in: maximum size of the buf_pool in
blocks */
ulint curr_size, /* in: current size to use, must be <=
max_size, currently must be equal to
max_size */ max_size */
ulint n_frames); /* in: number of frames; if AWE is used,
this is the size of the address space window
where physical memory pages are mapped; if
AWE is not used then this must be the same
as max_size */
/************************************************************************* /*************************************************************************
Gets the current size of buffer pool in bytes. */ Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
size of AWE window (= the frames). */
UNIV_INLINE UNIV_INLINE
ulint ulint
buf_pool_get_curr_size(void); buf_pool_get_curr_size(void);
/*========================*/ /*========================*/
/* out: size in bytes */ /* out: size in bytes */
/************************************************************************* /*************************************************************************
Gets the maximum size of buffer pool in bytes. */ Gets the maximum size of buffer pool in bytes. In the case of AWE, the
size of AWE window (= the frames). */
UNIV_INLINE UNIV_INLINE
ulint ulint
buf_pool_get_max_size(void); buf_pool_get_max_size(void);
...@@ -138,8 +150,8 @@ improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */ ...@@ -138,8 +150,8 @@ improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */
NOTE! The following macros should be used instead of NOTE! The following macros should be used instead of
buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
RW_X_LATCH are allowed as LA! */ RW_X_LATCH are allowed as LA! */
#define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\ #define buf_page_optimistic_get(LA, BL, G, MC, MTR) buf_page_optimistic_get_func(\
LA, G, MC, IB__FILE__, __LINE__, MTR) LA, BL, G, MC, IB__FILE__, __LINE__, MTR)
/************************************************************************ /************************************************************************
This is the general function used to get optimistic access to a database This is the general function used to get optimistic access to a database
page. */ page. */
...@@ -149,7 +161,9 @@ buf_page_optimistic_get_func( ...@@ -149,7 +161,9 @@ buf_page_optimistic_get_func(
/*=========================*/ /*=========================*/
/* out: TRUE if success */ /* out: TRUE if success */
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
buf_frame_t* guess, /* in: guessed frame */ buf_block_t* block, /* in: guessed block */
buf_frame_t* guess, /* in: guessed frame; note that AWE may move
frames */
dulint modify_clock,/* in: modify clock value if mode is dulint modify_clock,/* in: modify clock value if mode is
..._GUESS_ON_CLOCK */ ..._GUESS_ON_CLOCK */
char* file, /* in: file name */ char* file, /* in: file name */
...@@ -350,6 +364,16 @@ buf_frame_modify_clock_inc( ...@@ -350,6 +364,16 @@ buf_frame_modify_clock_inc(
/* out: new value */ /* out: new value */
buf_frame_t* frame); /* in: pointer to a frame */ buf_frame_t* frame); /* in: pointer to a frame */
/************************************************************************ /************************************************************************
Increments the modify clock of a frame by 1. The caller must (1) own the
buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
on the block. */
UNIV_INLINE
dulint
buf_block_modify_clock_inc(
/*=======================*/
/* out: new value */
buf_block_t* block); /* in: block */
/************************************************************************
Returns the value of the modify clock. The caller must have an s-lock Returns the value of the modify clock. The caller must have an s-lock
or x-lock on the block. */ or x-lock on the block. */
UNIV_INLINE UNIV_INLINE
...@@ -428,7 +452,7 @@ UNIV_INLINE ...@@ -428,7 +452,7 @@ UNIV_INLINE
buf_frame_t* buf_frame_t*
buf_frame_align( buf_frame_align(
/*============*/ /*============*/
/* out: pointer to block */ /* out: pointer to frame */
byte* ptr); /* in: pointer to a frame */ byte* ptr); /* in: pointer to a frame */
/*********************************************************************** /***********************************************************************
Checks if a pointer points to the block array of the buffer pool (blocks, not Checks if a pointer points to the block array of the buffer pool (blocks, not
...@@ -505,6 +529,19 @@ buf_pool_invalidate(void); ...@@ -505,6 +529,19 @@ buf_pool_invalidate(void);
--------------------------- LOWER LEVEL ROUTINES ------------------------- --------------------------- LOWER LEVEL ROUTINES -------------------------
=========================================================================*/ =========================================================================*/
/************************************************************************
Maps the page of block to a frame, if not mapped yet. Unmaps some page
from the end of the awe_LRU_free_mapped. */
void
buf_awe_map_page_to_frame(
/*======================*/
buf_block_t* block, /* in: block whose page should be
mapped to a frame */
ibool add_to_mapped_list);/* in: TRUE if we in the case
we need to map the page should also
add the block to the
awe_LRU_free_mapped list */
/************************************************************************* /*************************************************************************
Adds latch level info for the rw-lock protecting the buffer frame. This Adds latch level info for the rw-lock protecting the buffer frame. This
should be called in the debug version after a successful latching of a should be called in the debug version after a successful latching of a
...@@ -638,7 +675,16 @@ struct buf_block_struct{ ...@@ -638,7 +675,16 @@ struct buf_block_struct{
byte* frame; /* pointer to buffer frame which byte* frame; /* pointer to buffer frame which
is of size UNIV_PAGE_SIZE, and is of size UNIV_PAGE_SIZE, and
aligned to an address divisible by aligned to an address divisible by
UNIV_PAGE_SIZE */ UNIV_PAGE_SIZE; if AWE is used, this
will be NULL for the pages which are
currently not mapped into the virtual
address space window of the buffer
pool */
os_awe_t* awe_info; /* if AWE is used, then an array of
awe page infos for
UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE
(normally = 4) physical memory
pages; otherwise NULL */
ulint space; /* space id of the page */ ulint space; /* space id of the page */
ulint offset; /* page number within the space */ ulint offset; /* page number within the space */
ulint lock_hash_val; /* hashed value of the page address ulint lock_hash_val; /* hashed value of the page address
...@@ -691,6 +737,10 @@ struct buf_block_struct{ ...@@ -691,6 +737,10 @@ struct buf_block_struct{
/* node of the free block list */ /* node of the free block list */
UT_LIST_NODE_T(buf_block_t) LRU; UT_LIST_NODE_T(buf_block_t) LRU;
/* node of the LRU list */ /* node of the LRU list */
UT_LIST_NODE_T(buf_block_t) awe_LRU_free_mapped;
/* in the AWE version node in the
list of free and LRU blocks which are
mapped to a frame */
ulint LRU_position; /* value which monotonically ulint LRU_position; /* value which monotonically
decreases (or may stay constant if decreases (or may stay constant if
the block is in the old blocks) toward the block is in the old blocks) toward
...@@ -758,11 +808,12 @@ struct buf_block_struct{ ...@@ -758,11 +808,12 @@ struct buf_block_struct{
BTR_SEARCH_RIGHT_SIDE in hash BTR_SEARCH_RIGHT_SIDE in hash
indexing */ indexing */
/* 6. Debug fields */ /* 6. Debug fields */
#ifdef UNIV_SYNC_DEBUG
rw_lock_t debug_latch; /* in the debug version, each thread rw_lock_t debug_latch; /* in the debug version, each thread
which bufferfixes the block acquires which bufferfixes the block acquires
an s-latch here; so we can use the an s-latch here; so we can use the
debug utilities in sync0rw */ debug utilities in sync0rw */
#endif
ibool file_page_was_freed; ibool file_page_was_freed;
/* this is set to TRUE when fsp /* this is set to TRUE when fsp
frees a page in buffer pool */ frees a page in buffer pool */
...@@ -781,16 +832,36 @@ struct buf_pool_struct{ ...@@ -781,16 +832,36 @@ struct buf_pool_struct{
struct and control blocks, except the struct and control blocks, except the
read-write lock in them */ read-write lock in them */
byte* frame_mem; /* pointer to the memory area which byte* frame_mem; /* pointer to the memory area which
was allocated for the frames */ was allocated for the frames; in AWE
this is the virtual address space
window where we map pages stored
in physical memory */
byte* frame_zero; /* pointer to the first buffer frame: byte* frame_zero; /* pointer to the first buffer frame:
this may differ from frame_mem, because this may differ from frame_mem, because
this is aligned by the frame size */ this is aligned by the frame size */
byte* high_end; /* pointer to the end of the byte* high_end; /* pointer to the end of the buffer
buffer pool */ frames */
ulint n_frames; /* number of frames */
buf_block_t* blocks; /* array of buffer control blocks */ buf_block_t* blocks; /* array of buffer control blocks */
buf_block_t** blocks_of_frames;/* inverse mapping which can be used
to retrieve the buffer control block
of a frame; this is an array which
lists the blocks of frames in the
order frame_zero,
frame_zero + UNIV_PAGE_SIZE, ...
a control block is always assigned
for each frame, even if the frame does
not contain any data; note that in AWE
there are more control blocks than
buffer frames */
os_awe_t* awe_info; /* if AWE is used, AWE info for the
physical 4 kB memory pages associated
with buffer frames */
ulint max_size; /* number of control blocks == ulint max_size; /* number of control blocks ==
maximum pool size in pages */ maximum pool size in pages */
ulint curr_size; /* current pool size in pages */ ulint curr_size; /* current pool size in pages;
currently always the same as
max_size */
hash_table_t* page_hash; /* hash table of the file pages */ hash_table_t* page_hash; /* hash table of the file pages */
ulint n_pend_reads; /* number of pending read operations */ ulint n_pend_reads; /* number of pending read operations */
...@@ -802,11 +873,14 @@ struct buf_pool_struct{ ...@@ -802,11 +873,14 @@ struct buf_pool_struct{
ulint n_pages_created;/* number of pages created in the pool ulint n_pages_created;/* number of pages created in the pool
with no read */ with no read */
ulint n_page_gets; /* number of page gets performed; ulint n_page_gets; /* number of page gets performed;
also successful seraches through also successful searches through
the adaptive hash index are the adaptive hash index are
counted as page gets; this field counted as page gets; this field
is NOT protected by the buffer is NOT protected by the buffer
pool mutex */ pool mutex */
ulint n_pages_awe_remapped; /* if AWE is enabled, the
number of remaps of blocks to
buffer frames */
ulint n_page_gets_old;/* n_page_gets when buf_print was ulint n_page_gets_old;/* n_page_gets when buf_print was
last time called: used to calculate last time called: used to calculate
hit rate */ hit rate */
...@@ -815,6 +889,7 @@ struct buf_pool_struct{ ...@@ -815,6 +889,7 @@ struct buf_pool_struct{
ulint n_pages_written_old;/* number write operations */ ulint n_pages_written_old;/* number write operations */
ulint n_pages_created_old;/* number of pages created in ulint n_pages_created_old;/* number of pages created in
the pool with no read */ the pool with no read */
ulint n_pages_awe_remapped_old;
/* 2. Page flushing algorithm fields */ /* 2. Page flushing algorithm fields */
UT_LIST_BASE_NODE_T(buf_block_t) flush_list; UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
...@@ -847,7 +922,10 @@ struct buf_pool_struct{ ...@@ -847,7 +922,10 @@ struct buf_pool_struct{
/* 3. LRU replacement algorithm fields */ /* 3. LRU replacement algorithm fields */
UT_LIST_BASE_NODE_T(buf_block_t) free; UT_LIST_BASE_NODE_T(buf_block_t) free;
/* base node of the free block list */ /* base node of the free block list;
in the case of AWE, at the start are
always free blocks for which the
physical memory is mapped to a frame */
UT_LIST_BASE_NODE_T(buf_block_t) LRU; UT_LIST_BASE_NODE_T(buf_block_t) LRU;
/* base node of the LRU list */ /* base node of the LRU list */
buf_block_t* LRU_old; /* pointer to the about 3/8 oldest buf_block_t* LRU_old; /* pointer to the about 3/8 oldest
...@@ -859,6 +937,12 @@ struct buf_pool_struct{ ...@@ -859,6 +937,12 @@ struct buf_pool_struct{
see buf0lru.c for the restrictions see buf0lru.c for the restrictions
on this value; not defined if on this value; not defined if
LRU_old == NULL */ LRU_old == NULL */
UT_LIST_BASE_NODE_T(buf_block_t) awe_LRU_free_mapped;
/* list of those blocks which are
in the LRU list or the free list, and
where the page is mapped to a frame;
thus, frames allocated, e.g., to the
locki table, are not in this list */
}; };
/* States of a control block */ /* States of a control block */
......
...@@ -36,25 +36,27 @@ buf_block_peek_if_too_old( ...@@ -36,25 +36,27 @@ buf_block_peek_if_too_old(
} }
/************************************************************************* /*************************************************************************
Gets the current size of buffer buf_pool in bytes. */ Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
size of AWE window (= the frames). */
UNIV_INLINE UNIV_INLINE
ulint ulint
buf_pool_get_curr_size(void) buf_pool_get_curr_size(void)
/*========================*/ /*========================*/
/* out: size in bytes */ /* out: size in bytes */
{ {
return((buf_pool->curr_size) * UNIV_PAGE_SIZE); return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
} }
/************************************************************************* /*************************************************************************
Gets the maximum size of buffer buf_pool in bytes. */ Gets the maximum size of buffer buf_pool in bytes. In the case of AWE, the
size of AWE window (= the frames). */
UNIV_INLINE UNIV_INLINE
ulint ulint
buf_pool_get_max_size(void) buf_pool_get_max_size(void)
/*=======================*/ /*=======================*/
/* out: size in bytes */ /* out: size in bytes */
{ {
return((buf_pool->max_size) * UNIV_PAGE_SIZE); return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
} }
/*********************************************************************** /***********************************************************************
...@@ -207,54 +209,24 @@ buf_block_align( ...@@ -207,54 +209,24 @@ buf_block_align(
frame_zero = buf_pool->frame_zero; frame_zero = buf_pool->frame_zero;
ut_ad((ulint)ptr >= (ulint)frame_zero); if ((ulint)ptr < (ulint)frame_zero
|| (ulint)ptr > (ulint)(buf_pool->high_end)) {
block = buf_pool_get_nth_block(buf_pool, ((ulint)(ptr - frame_zero))
>> UNIV_PAGE_SIZE_SHIFT);
if (block < buf_pool->blocks
|| block >= buf_pool->blocks + buf_pool->max_size) {
fprintf(stderr,
"InnoDB: Error: trying to access a stray pointer %lx\n"
"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr,
(ulint)frame_zero, buf_pool->max_size);
ut_a(0);
}
return(block);
}
/***********************************************************************
Gets the block to whose frame the pointer is pointing to. Does not
require a file page to be bufferfixed. */
UNIV_INLINE
buf_block_t*
buf_block_align_low(
/*================*/
/* out: pointer to block */
byte* ptr) /* in: pointer to a frame */
{
buf_block_t* block;
buf_frame_t* frame_zero;
ut_ad(ptr);
frame_zero = buf_pool->frame_zero;
ut_ad((ulint)ptr >= (ulint)frame_zero);
block = buf_pool_get_nth_block(buf_pool, ((ulint)(ptr - frame_zero))
>> UNIV_PAGE_SIZE_SHIFT);
if (block < buf_pool->blocks
|| block >= buf_pool->blocks + buf_pool->max_size) {
ut_print_timestamp(stderr);
fprintf(stderr, fprintf(stderr,
"InnoDB: Error: trying to access a stray pointer %lx\n" " InnoDB: Error: trying to access a stray pointer %lx\n"
"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr, "InnoDB: buf pool start is at %lx, end at %lx\n"
(ulint)frame_zero, buf_pool->max_size); "InnoDB: Probable reason is database corruption or memory\n"
"InnoDB: corruption. If this happens in an InnoDB database recovery,\n"
"InnoDB: you can look from section 6.1 at http://www.innodb.com/ibman.html\n"
"InnoDB: how to force recovery.\n",
(ulint)ptr, (ulint)frame_zero,
(ulint)(buf_pool->high_end));
ut_a(0); ut_a(0);
} }
block = *(buf_pool->blocks_of_frames + (((ulint)(ptr - frame_zero))
>> UNIV_PAGE_SIZE_SHIFT));
return(block); return(block);
} }
...@@ -264,7 +236,7 @@ UNIV_INLINE ...@@ -264,7 +236,7 @@ UNIV_INLINE
buf_frame_t* buf_frame_t*
buf_frame_align( buf_frame_align(
/*============*/ /*============*/
/* out: pointer to block */ /* out: pointer to frame */
byte* ptr) /* in: pointer to a frame */ byte* ptr) /* in: pointer to a frame */
{ {
buf_frame_t* frame; buf_frame_t* frame;
...@@ -273,14 +245,19 @@ buf_frame_align( ...@@ -273,14 +245,19 @@ buf_frame_align(
frame = ut_align_down(ptr, UNIV_PAGE_SIZE); frame = ut_align_down(ptr, UNIV_PAGE_SIZE);
if (((ulint)frame if (((ulint)frame < (ulint)(buf_pool->frame_zero))
< (ulint)(buf_pool->frame_zero)) || (ulint)frame >= (ulint)(buf_pool->high_end)) {
|| ((ulint)frame > (ulint)(buf_pool_get_nth_block(buf_pool,
buf_pool->max_size - 1)->frame))) { ut_print_timestamp(stderr);
fprintf(stderr, fprintf(stderr,
"InnoDB: Error: trying to access a stray pointer %lx\n" " InnoDB: Error: trying to access a stray pointer %lx\n"
"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr, "InnoDB: buf pool start is at %lx, end at %lx\n"
(ulint)(buf_pool->frame_zero), buf_pool->max_size); "InnoDB: Probable reason is database corruption or memory\n"
"InnoDB: corruption. If this happens in an InnoDB database recovery,\n"
"InnoDB: you can look from section 6.1 at http://www.innodb.com/ibman.html\n"
"InnoDB: how to force recovery.\n",
(ulint)ptr, (ulint)(buf_pool->frame_zero),
(ulint)(buf_pool->high_end));
ut_a(0); ut_a(0);
} }
...@@ -469,7 +446,7 @@ buf_frame_modify_clock_inc( ...@@ -469,7 +446,7 @@ buf_frame_modify_clock_inc(
ut_ad(frame); ut_ad(frame);
block = buf_block_align_low(frame); block = buf_block_align(frame);
ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0)) ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
|| rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE)); || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
...@@ -479,6 +456,25 @@ buf_frame_modify_clock_inc( ...@@ -479,6 +456,25 @@ buf_frame_modify_clock_inc(
return(block->modify_clock); return(block->modify_clock);
} }
/************************************************************************
Increments the modify clock of a frame by 1. The caller must (1) own the
buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
on the block. */
UNIV_INLINE
dulint
buf_block_modify_clock_inc(
/*=======================*/
/* out: new value */
buf_block_t* block) /* in: block */
{
ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
|| rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
UT_DULINT_INC(block->modify_clock);
return(block->modify_clock);
}
/************************************************************************ /************************************************************************
Returns the value of the modify clock. The caller must have an s-lock Returns the value of the modify clock. The caller must have an s-lock
or x-lock on the block. */ or x-lock on the block. */
...@@ -508,15 +504,16 @@ void ...@@ -508,15 +504,16 @@ void
buf_block_buf_fix_inc_debug( buf_block_buf_fix_inc_debug(
/*========================*/ /*========================*/
buf_block_t* block, /* in: block to bufferfix */ buf_block_t* block, /* in: block to bufferfix */
char* file, /* in: file name */ char* file __attribute__ ((unused)), /* in: file name */
ulint line) /* in: line */ ulint line __attribute__ ((unused))) /* in: line */
{ {
#ifdef UNIV_SYNC_DEBUG
ibool ret; ibool ret;
ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line); ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
ut_ad(ret == TRUE); ut_ad(ret == TRUE);
#endif
block->buf_fix_count++; block->buf_fix_count++;
} }
......
...@@ -53,7 +53,9 @@ LRU list to the free list. */ ...@@ -53,7 +53,9 @@ LRU list to the free list. */
buf_block_t* buf_block_t*
buf_LRU_get_free_block(void); buf_LRU_get_free_block(void);
/*=========================*/ /*=========================*/
/* out: the free control block */ /* out: the free control block; also if AWE is
used, it is guaranteed that the block has its
page mapped to a frame when we return */
/********************************************************************** /**********************************************************************
Puts a block back to the free list. */ Puts a block back to the free list. */
......
...@@ -355,12 +355,7 @@ in the debug version: spaces with an odd number as the id are replicate ...@@ -355,12 +355,7 @@ in the debug version: spaces with an odd number as the id are replicate
spaces */ spaces */
#define RECV_REPLICA_SPACE_ADD 1 #define RECV_REPLICA_SPACE_ADD 1
/* This many blocks must be left free in the buffer pool when we scan extern ulint recv_n_pool_free_frames;
the log and store the scanned log records in the buffer pool: we will
use these free blocks to read in pages when we start applying the
log records to the database. */
#define RECV_POOL_N_FREE_BLOCKS (ut_min(256, buf_pool_get_curr_size() / 8))
#ifndef UNIV_NONINL #ifndef UNIV_NONINL
#include "log0recv.ic" #include "log0recv.ic"
......
...@@ -15,6 +15,76 @@ Created 9/30/1995 Heikki Tuuri ...@@ -15,6 +15,76 @@ Created 9/30/1995 Heikki Tuuri
typedef void* os_process_t; typedef void* os_process_t;
typedef unsigned long int os_process_id_t; typedef unsigned long int os_process_id_t;
/* The cell type in os_awe_allocate_mem page info */
#ifdef __NT__
typedef ULONG_PTR os_awe_t;
#else
typedef ulint os_awe_t;
#endif
/* Physical page size when Windows AWE is used. This is the normal
page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB
pages. */
#define OS_AWE_X86_PAGE_SIZE 4096
/********************************************************************
Windows AWE support. Tries to enable the "lock pages in memory" privilege for
the current process so that the current process can allocate memory-locked
virtual address space to act as the window where AWE maps physical memory. */
ibool
os_awe_enable_lock_pages_in_mem(void);
/*=================================*/
/* out: TRUE if success, FALSE if error;
prints error info to stderr if no success */
/********************************************************************
Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
processor. */
ibool
os_awe_allocate_physical_mem(
/*=========================*/
/* out: TRUE if success */
os_awe_t** page_info, /* out, own: array of opaque data containing
the info for allocated physical memory pages;
each allocated 4 kB physical memory page has
one slot of type os_awe_t in the array */
ulint n_megabytes); /* in: number of megabytes to allocate */
/********************************************************************
Allocates a window in the virtual address space where we can map then
pages of physical memory. */
byte*
os_awe_allocate_virtual_mem_window(
/*===============================*/
/* out, own: allocated memory, or NULL if did not
succeed */
ulint size); /* in: virtual memory allocation size in bytes, must
be < 2 GB */
/********************************************************************
With this function you can map parts of physical memory allocated with
the ..._allocate_physical_mem to the virtual address space allocated with
the previous function. Intel implements this so that the process page
tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
showed that this takes < 1 microsecond, much better than the estimated 80 us
for copying a 16 kB page memory to memory. But, the operation will at least
partially invalidate the translation lookaside buffer (TLB) of all
processors. Under a real-world load the performance hit may be bigger. */
ibool
os_awe_map_physical_mem_to_window(
/*==============================*/
/* out: TRUE if success; the function
calls exit(1) in case of an error */
byte* ptr, /* in: a page-aligned pointer to
somewhere in the virtual address
space window; we map the physical mem
pages here */
ulint n_mem_pages, /* in: number of 4 kB mem pages to
map */
os_awe_t* page_info); /* in: array of page infos for those
pages; each page has one slot in the
array */
/******************************************************************** /********************************************************************
Converts the current process id to a number. It is not guaranteed that the Converts the current process id to a number. It is not guaranteed that the
number is unique. In Linux returns the 'process number' of the current number is unique. In Linux returns the 'process number' of the current
......
...@@ -61,6 +61,7 @@ extern ulint srv_flush_log_at_trx_commit; ...@@ -61,6 +61,7 @@ extern ulint srv_flush_log_at_trx_commit;
extern byte srv_latin1_ordering[256];/* The sort order table of the latin1 extern byte srv_latin1_ordering[256];/* The sort order table of the latin1
character set */ character set */
extern ulint srv_pool_size; extern ulint srv_pool_size;
extern ulint srv_awe_window_size;
extern ulint srv_mem_pool_size; extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size; extern ulint srv_lock_table_size;
...@@ -86,6 +87,8 @@ extern ibool srv_use_doublewrite_buf; ...@@ -86,6 +87,8 @@ extern ibool srv_use_doublewrite_buf;
extern ibool srv_set_thread_priorities; extern ibool srv_set_thread_priorities;
extern int srv_query_thread_priority; extern int srv_query_thread_priority;
extern ibool srv_use_awe;
extern ibool srv_use_adaptive_hash_indexes;
/*-------------------------------------------*/ /*-------------------------------------------*/
extern ulint srv_n_rows_inserted; extern ulint srv_n_rows_inserted;
......
...@@ -438,24 +438,28 @@ log_group_calc_lsn_offset( ...@@ -438,24 +438,28 @@ log_group_calc_lsn_offset(
log_group_t* group) /* in: log group */ log_group_t* group) /* in: log group */
{ {
dulint gr_lsn; dulint gr_lsn;
ulint gr_lsn_size_offset; ib_longlong gr_lsn_size_offset;
ulint difference; ib_longlong difference;
ulint group_size; ib_longlong group_size;
ulint offset; ib_longlong offset;
ut_ad(mutex_own(&(log_sys->mutex))); ut_ad(mutex_own(&(log_sys->mutex)));
/* If total log file size is > 2 GB we can easily get overflows
with 32-bit integers. Use 64-bit integers instead. */
gr_lsn = group->lsn; gr_lsn = group->lsn;
gr_lsn_size_offset = log_group_calc_size_offset(group->lsn_offset, gr_lsn_size_offset = (ib_longlong)
group); log_group_calc_size_offset(group->lsn_offset, group);
group_size = log_group_get_capacity(group);
group_size = (ib_longlong) log_group_get_capacity(group);
if (ut_dulint_cmp(lsn, gr_lsn) >= 0) { if (ut_dulint_cmp(lsn, gr_lsn) >= 0) {
difference = ut_dulint_minus(lsn, gr_lsn); difference = (ib_longlong) ut_dulint_minus(lsn, gr_lsn);
} else { } else {
difference = ut_dulint_minus(gr_lsn, lsn); difference = (ib_longlong) ut_dulint_minus(gr_lsn, lsn);
difference = difference % group_size; difference = difference % group_size;
...@@ -464,7 +468,13 @@ log_group_calc_lsn_offset( ...@@ -464,7 +468,13 @@ log_group_calc_lsn_offset(
offset = (gr_lsn_size_offset + difference) % group_size; offset = (gr_lsn_size_offset + difference) % group_size;
return(log_group_calc_real_offset(offset, group)); ut_a(offset <= 0xFFFFFFFF);
/* printf("Offset is %lu gr_lsn_offset is %lu difference is %lu\n",
(ulint)offset,(ulint)gr_lsn_size_offset, (ulint)difference);
*/
return(log_group_calc_real_offset((ulint)offset, group));
} }
/*********************************************************************** /***********************************************************************
...@@ -3054,8 +3064,8 @@ log_check_log_recs( ...@@ -3054,8 +3064,8 @@ log_check_log_recs(
ut_memcpy(scan_buf, start, end - start); ut_memcpy(scan_buf, start, end - start);
recv_scan_log_recs(TRUE, recv_scan_log_recs(TRUE,
buf_pool_get_curr_size() - (buf_pool->n_frames -
RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE, recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
FALSE, scan_buf, end - start, FALSE, scan_buf, end - start,
ut_dulint_align_down(buf_start_lsn, ut_dulint_align_down(buf_start_lsn,
OS_FILE_LOG_BLOCK_SIZE), OS_FILE_LOG_BLOCK_SIZE),
......
...@@ -71,6 +71,14 @@ ulint recv_previous_parsed_rec_is_multi = 0; ...@@ -71,6 +71,14 @@ ulint recv_previous_parsed_rec_is_multi = 0;
ulint recv_max_parsed_page_no = 0; ulint recv_max_parsed_page_no = 0;
/* This many frames must be left free in the buffer pool when we scan
the log and store the scanned log records in the buffer pool: we will
use these free frames to read in pages when we start applying the
log records to the database. */
ulint recv_n_pool_free_frames = 256;
/************************************************************ /************************************************************
Creates the recovery system. */ Creates the recovery system. */
...@@ -1018,10 +1026,10 @@ recv_recover_page( ...@@ -1018,10 +1026,10 @@ recv_recover_page(
block = buf_block_align(page); block = buf_block_align(page);
if (just_read_in) { if (just_read_in) {
/* Move the ownership of the x-latch on the page to this OS /* Move the ownership of the x-latch on the page to
thread, so that we can acquire a second x-latch on it. This this OS thread, so that we can acquire a second
is needed for the operations to the page to pass the debug x-latch on it. This is needed for the operations to
checks. */ the page to pass the debug checks. */
rw_lock_x_lock_move_ownership(&(block->lock)); rw_lock_x_lock_move_ownership(&(block->lock));
} }
...@@ -2362,8 +2370,8 @@ recv_group_scan_log_recs( ...@@ -2362,8 +2370,8 @@ recv_group_scan_log_recs(
group, start_lsn, end_lsn); group, start_lsn, end_lsn);
finished = recv_scan_log_recs(TRUE, finished = recv_scan_log_recs(TRUE,
buf_pool_get_curr_size() (buf_pool->n_frames
- RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE, - recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
TRUE, log_sys->buf, TRUE, log_sys->buf,
RECV_SCAN_SIZE, start_lsn, RECV_SCAN_SIZE, start_lsn,
contiguous_lsn, group_scanned_lsn); contiguous_lsn, group_scanned_lsn);
...@@ -3001,8 +3009,8 @@ ask_again: ...@@ -3001,8 +3009,8 @@ ask_again:
read_offset % UNIV_PAGE_SIZE, len, buf, NULL); read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
ret = recv_scan_log_recs(TRUE, ret = recv_scan_log_recs(TRUE,
buf_pool_get_curr_size() - (buf_pool->n_frames -
RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE, recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
TRUE, buf, len, start_lsn, TRUE, buf, len, start_lsn,
&dummy_lsn, &scanned_lsn); &dummy_lsn, &scanned_lsn);
......
...@@ -2127,7 +2127,7 @@ os_aio_simulated_handle( ...@@ -2127,7 +2127,7 @@ os_aio_simulated_handle(
ulint offs; ulint offs;
ulint lowest_offset; ulint lowest_offset;
byte* combined_buf; byte* combined_buf;
byte* combined_buf2= 0; /* Remove warning */ byte* combined_buf2;
ibool ret; ibool ret;
ulint n; ulint n;
ulint i; ulint i;
......
...@@ -12,11 +12,469 @@ Created 9/30/1995 Heikki Tuuri ...@@ -12,11 +12,469 @@ Created 9/30/1995 Heikki Tuuri
#include "os0proc.ic" #include "os0proc.ic"
#endif #endif
#include "ut0mem.h"
#include "ut0byte.h"
/*
How to get AWE to compile on Windows?
-------------------------------------
the Visual C++ has to be relatively recent and _WIN32_WINNT has to be
defined to a value >= 0x0500 when windows.h is included. An easy way
to accomplish that is to put
#define _WIN32_WINNT 0x0500
to the start of file \mysql\include\config-win.h
Where does AWE work?
-------------------
See the error message in os_awe_allocate_physical_mem().
How to assign privileges for mysqld to use AWE?
-----------------------------------------------
See the error message in os_awe_enable_lock_pages_in_mem().
Use Windows AWE functions in this order
---------------------------------------
(1) os_awe_enable_lock_pages_in_mem();
(2) os_awe_allocate_physical_mem();
(3) os_awe_allocate_virtual_mem_window();
(4) os_awe_map_physical_mem_to_window().
To test 'AWE' in a computer which does not have the AWE API,
you can compile with UNIV_SIMULATE_AWE defined in this file.
*/
#ifdef UNIV_SIMULATE_AWE
/* If we simulate AWE, we allocate the 'physical memory' here */
byte* os_awe_simulate_mem;
ulint os_awe_simulate_mem_size;
os_awe_t* os_awe_simulate_page_info;
byte* os_awe_simulate_window;
ulint os_awe_simulate_window_size;
/* In simulated AWE the following contains a NULL pointer or a pointer
to a mapped 'physical page' for each 4 kB page in the AWE window */
byte** os_awe_simulate_map;
#endif
#ifdef __NT__
os_awe_t* os_awe_page_info;
ulint os_awe_n_pages;
byte* os_awe_window;
ulint os_awe_window_size;
#endif
/********************************************************************
Windows AWE support. Tries to enable the "lock pages in memory" privilege for
the current process so that the current process can allocate memory-locked
virtual address space to act as the window where AWE maps physical memory. */
ibool
os_awe_enable_lock_pages_in_mem(void)
/*=================================*/
/* out: TRUE if success, FALSE if error;
prints error info to stderr if no success */
{
#ifdef UNIV_SIMULATE_AWE
return(TRUE);
#elif defined(__NT__)
struct {
DWORD Count;
LUID_AND_ATTRIBUTES Privilege[1];
} Info;
HANDLE hProcess;
HANDLE Token;
BOOL Result;
hProcess = GetCurrentProcess();
/* Open the token of the current process */
Result = OpenProcessToken(hProcess,
TOKEN_ADJUST_PRIVILEGES,
&Token);
if (Result != TRUE) {
fprintf(stderr,
"InnoDB: AWE: Cannot open process token, error %lu\n",
(ulint)GetLastError());
return(FALSE);
}
Info.Count = 1;
Info.Privilege[0].Attributes = SE_PRIVILEGE_ENABLED;
/* Get the local unique identifier (LUID) of the SE_LOCK_MEMORY
privilege */
Result = LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME,
&(Info.Privilege[0].Luid));
if (Result != TRUE) {
fprintf(stderr,
"InnoDB: AWE: Cannot get local privilege value for %s, error %lu.\n",
SE_LOCK_MEMORY_NAME, (ulint)GetLastError());
return(FALSE);
}
/* Try to adjust the privilege */
Result = AdjustTokenPrivileges(Token, FALSE,
(PTOKEN_PRIVILEGES)&Info,
0, NULL, NULL);
/* Check the result */
if (Result != TRUE) {
fprintf(stderr,
"InnoDB: AWE: Cannot adjust process token privileges, error %u.\n",
GetLastError());
return(FALSE);
} else if (GetLastError() != ERROR_SUCCESS) {
fprintf(stderr,
"InnoDB: AWE: Cannot enable SE_LOCK_MEMORY privilege, error %lu.\n"
"InnoDB: In Windows XP Home you cannot use AWE. In Windows 2000 and XP\n"
"InnoDB: Professional you must go to the Control Panel, to\n"
"InnoDB: Security Settings, to Local Policies, and enable\n"
"InnoDB: the 'lock pages in memory' privilege for the user who runs\n"
"InnoDB: the MySQL server.\n", GetLastError());
return(FALSE);
}
CloseHandle(Token);
return(TRUE);
#else
#ifdef __WIN__ #ifdef __WIN__
#include <windows.h> fprintf(stderr,
"InnoDB: AWE: Error: to use AWE you must use a ...-nt MySQL executable.\n");
#endif #endif
return(FALSE);
#endif
}
#include "ut0mem.h" /********************************************************************
Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
processor. */
ibool
os_awe_allocate_physical_mem(
/*=========================*/
/* out: TRUE if success */
os_awe_t** page_info, /* out, own: array of opaque data containing
the info for allocated physical memory pages;
each allocated 4 kB physical memory page has
one slot of type os_awe_t in the array */
ulint n_megabytes) /* in: number of megabytes to allocate */
{
#ifdef UNIV_SIMULATE_AWE
os_awe_simulate_page_info = ut_malloc(sizeof(os_awe_t) *
n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE));
os_awe_simulate_mem = ut_align(ut_malloc(
4096 + 1024 * 1024 * n_megabytes),
4096);
os_awe_simulate_mem_size = n_megabytes * 1024 * 1024;
*page_info = os_awe_simulate_page_info;
return(TRUE);
#elif defined(__NT__)
BOOL bResult;
ULONG_PTR NumberOfPages; /* Question: why does Windows
use the name ULONG_PTR for
a scalar integer type? Maybe
because we may also refer to
&NumberOfPages? */
ULONG_PTR NumberOfPagesInitial;
SYSTEM_INFO sSysInfo;
int PFNArraySize;
if (n_megabytes > 64 * 1024) {
fprintf(stderr,
"InnoDB: AWE: Error: tried to allocate %lu MB.\n"
"InnoDB: AWE cannot allocate more than 64 GB in any computer.\n", n_megabytes);
return(FALSE);
}
GetSystemInfo(&sSysInfo); /* fill the system information structure */
if ((ulint)OS_AWE_X86_PAGE_SIZE != (ulint)sSysInfo.dwPageSize) {
fprintf(stderr,
"InnoDB: AWE: Error: this computer has a page size of %lu.\n"
"InnoDB: Should be 4096 bytes for InnoDB AWE support to work.\n",
(ulint)sSysInfo.dwPageSize);
return(FALSE);
}
/* Calculate the number of pages of memory to request */
NumberOfPages = n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE);
/* Calculate the size of page_info for allocated physical pages */
PFNArraySize = NumberOfPages * sizeof(ULONG_PTR);
*page_info = (ULONG_PTR*)HeapAlloc(GetProcessHeap(), 0, PFNArraySize);
if (*page_info == NULL) {
fprintf(stderr,
"InnoDB: AWE: Failed to allocate page info array from process heap, error %lu\n",
(ulint)GetLastError());
return(FALSE);
}
ut_total_allocated_memory += PFNArraySize;
/* Enable this process' privilege to lock pages to physical memory */
if (!os_awe_enable_lock_pages_in_mem()) {
return(FALSE);
}
/* Allocate the physical memory */
NumberOfPagesInitial = NumberOfPages;
os_awe_page_info = *page_info;
os_awe_n_pages = (ulint)NumberOfPages;
/* Compilation note: if the compiler complains the function is not
defined, see the note at the start of this file */
bResult = AllocateUserPhysicalPages(GetCurrentProcess(),
&NumberOfPages,
*page_info);
if (bResult != TRUE) {
fprintf(stderr,
"InnoDB: AWE: Cannot allocate physical pages, error %lu.\n",
(ulint)GetLastError());
return(FALSE);
}
if (NumberOfPagesInitial != NumberOfPages) {
fprintf(stderr,
"InnoDB: AWE: Error: allocated only %lu pages of %lu requested.\n"
"InnoDB: Check that you have enough free RAM.\n"
"InnoDB: In Windows XP Professional and 2000 Professional\n"
"InnoDB: Windows PAE size is max 4 GB. In 2000 and .NET"
"InnoDB: Advanced Servers and 2000 Datacenter Server it is 32 GB,\n"
"InnoDB: and in .NET Datacenter Server it is 64 GB.\n"
"InnoDB: A Microsoft web page said that the processor must be an Intel\n"
"InnoDB: processor.",
(ulint)NumberOfPages,
(ulint)NumberOfPagesInitial);
return(FALSE);
}
fprintf(stderr,
"InnoDB: Using Address Windowing Extensions (AWE); allocated %lu MB\n",
n_megabytes);
return(TRUE);
#else
return(FALSE);
#endif
}
/********************************************************************
Allocates a window in the virtual address space where we can map then
pages of physical memory. */
byte*
os_awe_allocate_virtual_mem_window(
/*===============================*/
/* out, own: allocated memory, or NULL if did not
succeed */
ulint size) /* in: virtual memory allocation size in bytes, must
be < 2 GB */
{
#ifdef UNIV_SIMULATE_AWE
ulint i;
os_awe_simulate_window = ut_align(ut_malloc(4096 + size), 4096);
os_awe_simulate_window_size = size;
os_awe_simulate_map = ut_malloc(sizeof(byte*) * (size / 4096));
for (i = 0; i < (size / 4096); i++) {
*(os_awe_simulate_map + i) = NULL;
}
return(os_awe_simulate_window);
#elif defined(__NT__)
byte* ptr;
if (size > 0x7FFFFFFFFF) {
fprintf(stderr,
"InnoDB: AWE: Cannot allocate %lu bytes of virtual memory\n", size);
return(NULL);
}
ptr = VirtualAlloc(NULL, (SIZE_T)size, MEM_RESERVE | MEM_PHYSICAL,
PAGE_READWRITE);
if (ptr == NULL) {
fprintf(stderr,
"InnoDB: AWE: Cannot allocate %lu bytes of virtual memory, error %lu\n",
size, (ulint)GetLastError());
return(NULL);
}
os_awe_window = ptr;
os_awe_window_size = size;
ut_total_allocated_memory += size;
return(ptr);
#else
return(NULL);
#endif
}
/********************************************************************
With this function you can map parts of physical memory allocated with
the ..._allocate_physical_mem to the virtual address space allocated with
the previous function. Intel implements this so that the process page
tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
showed that this takes < 1 microsecond, much better than the estimated 80 us
for copying a 16 kB page memory to memory. But, the operation will at least
partially invalidate the translation lookaside buffer (TLB) of all
processors. Under a real-world load the performance hit may be bigger. */
ibool
os_awe_map_physical_mem_to_window(
/*==============================*/
/* out: TRUE if success; the function
calls exit(1) in case of an error */
byte* ptr, /* in: a page-aligned pointer to
somewhere in the virtual address
space window; we map the physical mem
pages here */
ulint n_mem_pages, /* in: number of 4 kB mem pages to
map */
os_awe_t* page_info) /* in: array of page infos for those
pages; each page has one slot in the
array */
{
#ifdef UNIV_SIMULATE_AWE
ulint i;
byte** map;
byte* page;
byte* phys_page;
ut_a(ptr >= os_awe_simulate_window);
ut_a(ptr < os_awe_simulate_window + os_awe_simulate_window_size);
ut_a(page_info >= os_awe_simulate_page_info);
ut_a(page_info < os_awe_simulate_page_info +
(os_awe_simulate_mem_size / 4096));
/* First look if some other 'physical pages' are mapped at ptr,
and copy them back to where they were if yes */
map = os_awe_simulate_map
+ ((ulint)(ptr - os_awe_simulate_window)) / 4096;
page = ptr;
for (i = 0; i < n_mem_pages; i++) {
if (*map != NULL) {
ut_memcpy(*map, page, 4096);
}
map++;
page += 4096;
}
/* Then copy to ptr the 'physical pages' determined by page_info; we
assume page_info is a segment of the array we created at the start */
phys_page = os_awe_simulate_mem
+ (ulint)(page_info - os_awe_simulate_page_info)
* 4096;
ut_memcpy(ptr, phys_page, n_mem_pages * 4096);
/* Update the map */
map = os_awe_simulate_map
+ ((ulint)(ptr - os_awe_simulate_window)) / 4096;
for (i = 0; i < n_mem_pages; i++) {
*map = phys_page;
map++;
phys_page += 4096;
}
return(TRUE);
#elif defined(__NT__)
BOOL bResult;
ULONG_PTR n_pages;
n_pages = (ULONG_PTR)n_mem_pages;
if (!(ptr >= os_awe_window)) {
fprintf(stderr,
"InnoDB: AWE: Error: trying to map to address %lx but AWE window start %lx\n",
(ulint)ptr, (ulint)os_awe_window);
ut_a(0);
}
if (!(ptr <= os_awe_window + os_awe_window_size - UNIV_PAGE_SIZE)) {
fprintf(stderr,
"InnoDB: AWE: Error: trying to map to address %lx but AWE window end %lx\n",
(ulint)ptr, (ulint)os_awe_window + os_awe_window_size);
ut_a(0);
}
if (!(page_info >= os_awe_page_info)) {
fprintf(stderr,
"InnoDB: AWE: Error: trying to map page info at %lx but array start %lx\n",
(ulint)page_info, (ulint)os_awe_page_info);
ut_a(0);
}
if (!(page_info <= os_awe_page_info + (os_awe_n_pages - 4))) {
fprintf(stderr,
"InnoDB: AWE: Error: trying to map page info at %lx but array end %lx\n",
(ulint)page_info, (ulint)(os_awe_page_info + os_awe_n_pages));
ut_a(0);
}
bResult = MapUserPhysicalPages((PVOID)ptr, n_pages, page_info);
if (bResult != TRUE) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: AWE: Mapping of %lu physical pages to address %lx failed,\n"
"InnoDB: error %lu.\n"
"InnoDB: Cannot continue operation.\n",
n_mem_pages, (ulint)ptr, (ulint)GetLastError());
exit(1);
}
return(TRUE);
#else
return(FALSE);
#endif
}
/******************************************************************** /********************************************************************
Converts the current process id to a number. It is not guaranteed that the Converts the current process id to a number. It is not guaranteed that the
......
...@@ -140,9 +140,14 @@ byte srv_latin1_ordering[256] /* The sort order table of the latin1 ...@@ -140,9 +140,14 @@ byte srv_latin1_ordering[256] /* The sort order table of the latin1
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF , 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
}; };
ulint srv_pool_size = ULINT_MAX; /* size in database pages; ulint srv_pool_size = ULINT_MAX; /* size in pages; MySQL inits
MySQL originally sets this this to size in kilobytes but
value in megabytes */ we normalize this to pages in
srv_boot() */
ulint srv_awe_window_size = 0; /* size in pages; MySQL inits
this to bytes, but we
normalize it to pages in
srv_boot() */
ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */ ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */
ulint srv_lock_table_size = ULINT_MAX; ulint srv_lock_table_size = ULINT_MAX;
...@@ -218,6 +223,13 @@ ibool srv_use_doublewrite_buf = TRUE; ...@@ -218,6 +223,13 @@ ibool srv_use_doublewrite_buf = TRUE;
ibool srv_set_thread_priorities = TRUE; ibool srv_set_thread_priorities = TRUE;
int srv_query_thread_priority = 0; int srv_query_thread_priority = 0;
/* TRUE if the Address Windowing Extensions of Windows are used; then we must
disable adaptive hash indexes */
ibool srv_use_awe = FALSE;
ibool srv_use_adaptive_hash_indexes = TRUE;
/*-------------------------------------------*/ /*-------------------------------------------*/
ulint srv_n_spin_wait_rounds = 20; ulint srv_n_spin_wait_rounds = 20;
ulint srv_spin_wait_delay = 5; ulint srv_spin_wait_delay = 5;
...@@ -1956,9 +1968,19 @@ srv_normalize_init_values(void) ...@@ -1956,9 +1968,19 @@ srv_normalize_init_values(void)
srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE; srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
srv_pool_size = srv_pool_size / UNIV_PAGE_SIZE; srv_pool_size = srv_pool_size / (UNIV_PAGE_SIZE / 1024);
srv_awe_window_size = srv_awe_window_size / UNIV_PAGE_SIZE;
if (srv_use_awe) {
/* If we are using AWE we must save memory in the 32-bit
address space of the process, and cannot bind the lock
table size to the real buffer pool size. */
srv_lock_table_size = 20 * srv_awe_window_size;
} else {
srv_lock_table_size = 20 * srv_pool_size; srv_lock_table_size = 20 * srv_pool_size;
}
return(DB_SUCCESS); return(DB_SUCCESS);
} }
...@@ -2323,6 +2345,12 @@ srv_sprintf_innodb_monitor( ...@@ -2323,6 +2345,12 @@ srv_sprintf_innodb_monitor(
"Total memory allocated %lu; in additional pool allocated %lu\n", "Total memory allocated %lu; in additional pool allocated %lu\n",
ut_total_allocated_memory, ut_total_allocated_memory,
mem_pool_get_reserved(mem_comm_pool)); mem_pool_get_reserved(mem_comm_pool));
if (srv_use_awe) {
buf += sprintf(buf,
"In addition to that %lu MB of AWE memory allocated\n",
srv_pool_size / ((1024 * 1024) / UNIV_PAGE_SIZE));
}
buf_print_io(buf, buf_end); buf_print_io(buf, buf_end);
buf = buf + strlen(buf); buf = buf + strlen(buf);
ut_a(buf < buf_end + 1500); ut_a(buf < buf_end + 1500);
......
...@@ -935,6 +935,7 @@ innobase_start_or_create_for_mysql(void) ...@@ -935,6 +935,7 @@ innobase_start_or_create_for_mysql(void)
/*====================================*/ /*====================================*/
/* out: DB_SUCCESS or error code */ /* out: DB_SUCCESS or error code */
{ {
buf_pool_t* ret;
ibool create_new_db; ibool create_new_db;
ibool log_file_created; ibool log_file_created;
ibool log_created = FALSE; ibool log_created = FALSE;
...@@ -970,6 +971,11 @@ innobase_start_or_create_for_mysql(void) ...@@ -970,6 +971,11 @@ innobase_start_or_create_for_mysql(void)
#ifdef UNIV_MEM_DEBUG #ifdef UNIV_MEM_DEBUG
fprintf(stderr, fprintf(stderr,
"InnoDB: !!!!!!!!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!!!!!!!\n"); "InnoDB: !!!!!!!!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!!!!!!!\n");
#endif
#ifdef UNIV_SIMULATE_AWE
fprintf(stderr,
"InnoDB: !!!!!!!!!!!!!! UNIV_SIMULATE_AWE switched on !!!!!!!!!!!!!!!!!\n");
#endif #endif
if (srv_sizeof_trx_t_in_ha_innodb_cc != (ulint)sizeof(trx_t)) { if (srv_sizeof_trx_t_in_ha_innodb_cc != (ulint)sizeof(trx_t)) {
...@@ -1002,6 +1008,17 @@ innobase_start_or_create_for_mysql(void) ...@@ -1002,6 +1008,17 @@ innobase_start_or_create_for_mysql(void)
srv_startup_is_before_trx_rollback_phase = TRUE; srv_startup_is_before_trx_rollback_phase = TRUE;
os_aio_use_native_aio = FALSE; os_aio_use_native_aio = FALSE;
#if !defined(__NT__) && !defined(UNIV_SIMULATE_AWE)
if (srv_use_awe) {
fprintf(stderr,
"InnoDB: Error: You have specified innodb_buffer_pool_awe_mem_mb\n"
"InnoDB: in my.cnf, but AWE can only be used in Windows 2000 and later.\n");
return(DB_ERROR);
}
#endif
#ifdef __WIN__ #ifdef __WIN__
if (os_get_os_version() == OS_WIN95 if (os_get_os_version() == OS_WIN95
|| os_get_os_version() == OS_WIN31 || os_get_os_version() == OS_WIN31
...@@ -1057,6 +1074,9 @@ innobase_start_or_create_for_mysql(void) ...@@ -1057,6 +1074,9 @@ innobase_start_or_create_for_mysql(void)
return(DB_ERROR); return(DB_ERROR);
} }
/* Note that the call srv_boot() also changes the values of
srv_pool_size etc. to the units used by InnoDB internally */
err = srv_boot(); err = srv_boot();
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
...@@ -1088,7 +1108,26 @@ innobase_start_or_create_for_mysql(void) ...@@ -1088,7 +1108,26 @@ innobase_start_or_create_for_mysql(void)
fil_init(SRV_MAX_N_OPEN_FILES); fil_init(SRV_MAX_N_OPEN_FILES);
buf_pool_init(srv_pool_size, srv_pool_size); if (srv_use_awe) {
fprintf(stderr,
"InnoDB: Using AWE: Memory window is %lu MB and AWE memory is %lu MB\n",
srv_awe_window_size / ((1024 * 1024) / UNIV_PAGE_SIZE),
srv_pool_size / ((1024 * 1024) / UNIV_PAGE_SIZE));
/* We must disable adaptive hash indexes because they do not
tolerate remapping of pages in AWE */
srv_use_adaptive_hash_indexes = FALSE;
ret = buf_pool_init(srv_pool_size, srv_pool_size,
srv_awe_window_size);
} else {
ret = buf_pool_init(srv_pool_size, srv_pool_size,
srv_pool_size);
}
if (ret == NULL) {
return(DB_ERROR);
}
fsp_init(); fsp_init();
log_init(); log_init();
......
...@@ -472,9 +472,9 @@ trx_sys_update_mysql_binlog_offset( ...@@ -472,9 +472,9 @@ trx_sys_update_mysql_binlog_offset(
if (0 != ut_memcmp(sys_header + field + TRX_SYS_MYSQL_LOG_NAME, if (0 != ut_memcmp(sys_header + field + TRX_SYS_MYSQL_LOG_NAME,
file_name, 1 + ut_strlen(file_name))) { file_name, 1 + ut_strlen(file_name))) {
mlog_write_string((byte*) (sys_header + field mlog_write_string(sys_header + field
+ TRX_SYS_MYSQL_LOG_NAME), + TRX_SYS_MYSQL_LOG_NAME,
(byte*) file_name, 1 + ut_strlen(file_name), mtr); file_name, 1 + ut_strlen(file_name), mtr);
} }
if (mach_read_from_4(sys_header + field if (mach_read_from_4(sys_header + field
......
...@@ -99,7 +99,7 @@ trx_create( ...@@ -99,7 +99,7 @@ trx_create(
trx->mysql_log_file_name = NULL; trx->mysql_log_file_name = NULL;
trx->mysql_log_offset = 0; trx->mysql_log_offset = 0;
trx->mysql_master_log_file_name = (char*) ""; trx->mysql_master_log_file_name = "";
trx->mysql_master_log_pos = 0; trx->mysql_master_log_pos = 0;
trx->ignore_duplicates_in_insert = FALSE; trx->ignore_duplicates_in_insert = FALSE;
......
...@@ -197,6 +197,7 @@ ut_get_year_month_day( ...@@ -197,6 +197,7 @@ ut_get_year_month_day(
*month = (ulint)cal_tm.wMonth; *month = (ulint)cal_tm.wMonth;
*day = (ulint)cal_tm.wDay; *day = (ulint)cal_tm.wDay;
#else #else
struct tm cal_tm;
struct tm* cal_tm_ptr; struct tm* cal_tm_ptr;
time_t tm; time_t tm;
......
...@@ -82,7 +82,8 @@ are declared in mysqld.cc: */ ...@@ -82,7 +82,8 @@ are declared in mysqld.cc: */
long innobase_mirrored_log_groups, innobase_log_files_in_group, long innobase_mirrored_log_groups, innobase_log_files_in_group,
innobase_log_file_size, innobase_log_buffer_size, innobase_log_file_size, innobase_log_buffer_size,
innobase_buffer_pool_size, innobase_additional_mem_pool_size, innobase_buffer_pool_size, innobase_buffer_pool_awe_mem_mb,
innobase_additional_mem_pool_size,
innobase_file_io_threads, innobase_lock_wait_timeout, innobase_file_io_threads, innobase_lock_wait_timeout,
innobase_thread_concurrency, innobase_force_recovery; innobase_thread_concurrency, innobase_force_recovery;
...@@ -753,7 +754,25 @@ innobase_init(void) ...@@ -753,7 +754,25 @@ innobase_init(void)
srv_log_buffer_size = (ulint) innobase_log_buffer_size; srv_log_buffer_size = (ulint) innobase_log_buffer_size;
srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit; srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit;
srv_pool_size = (ulint) innobase_buffer_pool_size; /* We set srv_pool_size here in units of 1 kB. InnoDB internally
changes the value so that it becomes the number of database pages. */
if (innobase_buffer_pool_awe_mem_mb == 0) {
/* Careful here: we first convert the signed long int to ulint
and only after that divide */
srv_pool_size = ((ulint) innobase_buffer_pool_size) / 1024;
} else {
srv_use_awe = TRUE;
srv_pool_size = (ulint)
(1024 * innobase_buffer_pool_awe_mem_mb);
srv_awe_window_size = (ulint) innobase_buffer_pool_size;
/* Note that what the user specified as
innodb_buffer_pool_size is actually the AWE memory window
size in this case, and the real buffer pool size is
determined by .._awe_mem_mb. */
}
srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size; srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
......
...@@ -178,7 +178,8 @@ extern char *innobase_home, *innobase_tmpdir, *innobase_logdir; ...@@ -178,7 +178,8 @@ extern char *innobase_home, *innobase_tmpdir, *innobase_logdir;
extern long innobase_lock_scan_time; extern long innobase_lock_scan_time;
extern long innobase_mirrored_log_groups, innobase_log_files_in_group; extern long innobase_mirrored_log_groups, innobase_log_files_in_group;
extern long innobase_log_file_size, innobase_log_buffer_size; extern long innobase_log_file_size, innobase_log_buffer_size;
extern long innobase_buffer_pool_size, innobase_additional_mem_pool_size; extern long innobase_buffer_pool_size, innobase_buffer_pool_awe_mem_mb,
innobase_additional_mem_pool_size;
extern long innobase_file_io_threads, innobase_lock_wait_timeout; extern long innobase_file_io_threads, innobase_lock_wait_timeout;
extern long innobase_force_recovery, innobase_thread_concurrency; extern long innobase_force_recovery, innobase_thread_concurrency;
extern char *innobase_data_home_dir, *innobase_data_file_path; extern char *innobase_data_home_dir, *innobase_data_file_path;
......
...@@ -3194,6 +3194,7 @@ enum options { ...@@ -3194,6 +3194,7 @@ enum options {
OPT_INNODB_LOG_FILE_SIZE, OPT_INNODB_LOG_FILE_SIZE,
OPT_INNODB_LOG_BUFFER_SIZE, OPT_INNODB_LOG_BUFFER_SIZE,
OPT_INNODB_BUFFER_POOL_SIZE, OPT_INNODB_BUFFER_POOL_SIZE,
OPT_INNODB_BUFFER_POOL_AWE_MEM_MB,
OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE, OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE,
OPT_INNODB_FILE_IO_THREADS, OPT_INNODB_FILE_IO_THREADS,
OPT_INNODB_LOCK_WAIT_TIMEOUT, OPT_INNODB_LOCK_WAIT_TIMEOUT,
...@@ -3753,6 +3754,10 @@ struct my_option my_long_options[] = ...@@ -3753,6 +3754,10 @@ struct my_option my_long_options[] =
"The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
(gptr*) &innobase_buffer_pool_size, (gptr*) &innobase_buffer_pool_size, 0, (gptr*) &innobase_buffer_pool_size, (gptr*) &innobase_buffer_pool_size, 0,
GET_LONG, REQUIRED_ARG, 8*1024*1024L, 1024*1024L, ~0L, 0, 1024*1024L, 0}, GET_LONG, REQUIRED_ARG, 8*1024*1024L, 1024*1024L, ~0L, 0, 1024*1024L, 0},
{"innodb_buffer_pool_awe_mem_mb", OPT_INNODB_BUFFER_POOL_AWE_MEM_MB,
"If Windows AWE is used, the size of InnoDB buffer pool allocated from the AWE memory.",
(gptr*) &innobase_buffer_pool_awe_mem_mb, (gptr*) &innobase_buffer_pool_awe_mem_mb, 0,
GET_LONG, REQUIRED_ARG, 0, 0, 63000, 0, 1, 0},
{"innodb_additional_mem_pool_size", OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE, {"innodb_additional_mem_pool_size", OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE,
"Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.", "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.",
(gptr*) &innobase_additional_mem_pool_size, (gptr*) &innobase_additional_mem_pool_size,
......
...@@ -449,6 +449,7 @@ struct show_var_st init_vars[]= { ...@@ -449,6 +449,7 @@ struct show_var_st init_vars[]= {
#ifdef HAVE_INNOBASE_DB #ifdef HAVE_INNOBASE_DB
{"innodb_additional_mem_pool_size", (char*) &innobase_additional_mem_pool_size, SHOW_LONG }, {"innodb_additional_mem_pool_size", (char*) &innobase_additional_mem_pool_size, SHOW_LONG },
{"innodb_buffer_pool_size", (char*) &innobase_buffer_pool_size, SHOW_LONG }, {"innodb_buffer_pool_size", (char*) &innobase_buffer_pool_size, SHOW_LONG },
{"innodb_buffer_pool_awe_mem_mb", (char*) &innobase_buffer_pool_awe_mem_mb, SHOW_LONG },
{"innodb_data_file_path", (char*) &innobase_data_file_path, SHOW_CHAR_PTR}, {"innodb_data_file_path", (char*) &innobase_data_file_path, SHOW_CHAR_PTR},
{"innodb_data_home_dir", (char*) &innobase_data_home_dir, SHOW_CHAR_PTR}, {"innodb_data_home_dir", (char*) &innobase_data_home_dir, SHOW_CHAR_PTR},
{"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG }, {"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG },
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment