Commit ec06c782 authored by marko@hundin.mysql.fi's avatar marko@hundin.mysql.fi

Remove unnecessary files and functions

parent 38b46781
/**********************************************************************
File-space management
(c) 1995 Innobase Oy
Created 11/29/1995 Heikki Tuuri
***********************************************************************/
#include "fsp0fsp.h"
#include "buf0buf.h"
#include "fil0fil.h"
#include "sync0sync.h"
#include "mtr0log.h"
#include "fut0fut.h"
#include "ut0byte.h"
/* The data structures in files are defined just as byte strings in C */
typedef byte fsp_header_t;
typedef byte xdes_t;
typedef byte fseg_page_header_t;
/* Rw-latch protecting the whole file space system */
rw_lock_t fsp_latch;
/* SPACE HEADER
============
File space header data structure: this data structure
is contained in the first page of a space. The space for this header
is reserved in every extent descriptor page, but used only in the first. */
#define FSP_HEADER_OFFSET FIL_PAGE_DATA /* Offset of the space header
within a file page */
/*-------------------------------------*/
#define FSP_SIZE 0 /* Current
size of the space in pages */
#define FSP_FREE_LIMIT 4 /* Minimum page number for which
the free list has not been initialized:
the pages >= this limit are, by
definition, free */
#define FSP_LOWEST_NO_WRITE 8 /* The lowest page offset for which
the page has not been written to disk
(if it has been written, we know
that the OS has really reserved
the physical space for the page) */
#define FSP_FRAG_N_USED 12 /* number of used pages in
the FSP_FREE_FRAG list */
#define FSP_FREE 16 /* list of free extents */
#define FSP_FREE_FRAG (16 + FLST_BASE_NODE_SIZE)
/* list of partially free extents not
belonging to any segment */
#define FSP_FULL_FRAG (16 + 2 * FLST_BASE_NODE_SIZE)
/* list of full extents not belonging
to any segment */
#define FSP_SEG_ID (16 + 3 * FLST_BASE_NODE_SIZE)
/* 8 bytes which give the first
#define FSP_SEG_HDRS_FULL (24 + 3 * FLST_BASE_NODE_SIZE)
/* list of pages containing segment
headers, where all the segment header
slots are reserved */
#define FSP_SEG_HDRS_FREE (24 + 4 * FLST_BASE_NODE_SIZE)
/* list of pages containing segment
headers, where not all the segment
header slots are reserved */
/*-------------------------------------*/
/* File space header size */
#define FSP_HEADER_SIZE (24 + 4 * FLST_BASE_NODE_SIZE)
#define FSP_FREE_ADD 4 /* this many free extents are added
to the free list from above
FSP_FREE_LIMIT at a time */
/* SEGMENT HEADER
==============
Segment header which is created for each segment in a tablespace, on a
page of its own. NOTE: in purge we assume that a segment having only one
currently used page can be freed in a few steps, so that the freeing cannot
fill the file buffer with bufferfixed file pages. */
#define FSEG_HDR_PAGE_NODE FSEG_PAGE_DATA
/* the list node for linking
segment header pages */
#define FSEG_ARR_OFFSET (FSEG_PAGE_DATA + FLST_NODE_SIZE)
/*-------------------------------------*/
#define FSEG_ID 0 /* 8 bytes of segment id: if this is
ut_dulint_zero, it means that the
header is unused */
#define FSEG_NOT_FULL_N_USED 8
/* number of used segment pages in
the FSEG_NOT_FULL list */
#define FSEG_FREE 12
/* list of free extents of this
segment */
#define FSEG_NOT_FULL (12 + FLST_BASE_NODE_SIZE)
/* list of partially free extents */
#define FSEG_FULL (12 + 2 * FLST_BASE_NODE_SIZE)
/* list of full extents */
#define FSEG_MAGIC_N (12 + 3 * FLST_BASE_NODE_SIZE)
/* magic number used in debugging */
#define FSEG_FRAG_ARR (16 + 3 * FLST_BASE_NODE_SIZE)
/* array of individual pages
belonging to this segment in fsp
fragment extent lists */
#define FSEG_FRAG_ARR_N_SLOTS (FSP_EXTENT_SIZE / 2)
/* number of slots in the array for
the fragment pages */
#define FSEG_FRAG_SLOT_SIZE 4 /* a fragment page slot contains its
page number within space, FIL_NULL
means that the slot is not in use */
/*-------------------------------------*/
#define FSEG_HEADER_SIZE (16 + 3 * FLST_BASE_NODE_SIZE +\
FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
#define FSP_SEG_HDRS_PER_PAGE ((UNIV_PAGE_SIZE - FSEG_ARR_OFFSET - 10)\
/ FSEG_HEADER_SIZE)
/* Number of segment headers which fit on a
single page */
#define FSEG_MAGIC_N_VALUE 97937874
#define FSEG_FILLFACTOR 8 /* If this value is x, then if
the number of unused but reserved
pages in a segment is less than
reserved pages * 1/x, and there are
at least FSEG_FRAG_LIMIT used pages,
then we allow a new empty extent to
be added to the segment in
fseg_alloc_free_page. Otherwise, we
use unused pages of the segment. */
#define FSEG_FRAG_LIMIT FSEG_FRAG_N_ARR_SLOTS
/* If the segment has >= this many
used pages, it may be expanded by
allocating extents to the segment;
until that only individual fragment
pages are allocated from the space */
#define FSEG_FREE_LIST_LIMIT 40 /* If the reserved size of a segment
is at least this many extents, we
allow extents to be put to the free
list of the extent: at most
FSEG_FREE_LIST_MAX_LEN many */
#define FSEG_FREE_LIST_MAX_LEN 4
/* EXTENT DESCRIPTOR
=================
File extent descriptor data structure: contains bits to tell
which pages in the extent are free and which contain old tuple
version to clean. */
/*-------------------------------------*/
#define XDES_ID 0 /* The identifier of the segment
to which this extent belongs */
#define XDES_FLST_NODE 8 /* The list node data structure
for the descriptors */
#define XDES_STATE (FLST_NODE_SIZE + 8)
/* contains state information
of the extent */
#define XDES_BITMAP (FLST_NODE_SIZE + 12)
/* Descriptor bitmap of the pages
in the extent*/
/*-------------------------------------*/
#define XDES_BITS_PER_PAGE 2 /* How many bits are there per page */
#define XDES_FREE_BIT 0 /* Index of the bit which tells if
the page is free */
#define XDES_CLEAN_BIT 1 /* Index of the bit which tells if
there are old versions of tuples
on the page */
/* States of a descriptor */
#define XDES_FREE 1 /* extent is in free list of space */
#define XDES_FREE_FRAG 2 /* extent is in free fragment list of
space */
#define XDES_FULL_FRAG 3 /* extent is in full fragment list of
space */
#define XDES_FSEG 4 /* extent belongs to a segment*/
/* Number of pages described in a single descriptor page:
currently each page description takes less than
1 byte. */
#define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE
/* File extent data structure size in bytes. The "+ 7 ) / 8"
part in the definition rounds the number of bytes upward. */
#define XDES_SIZE (XDES_BITMAP +\
(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE + 7) / 8)
/* Offset of the descriptor array on a descriptor page */
#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
/**************************************************************************
Returns an extent to the free list of a space. */
static
void
fsp_free_extent(
/*============*/
ulint space, /* in: space id */
ulint page, /* in: page offset in the extent */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Frees an extent of a segment to the space free list. */
static
void
fseg_free_extent(
/*=============*/
fseg_header_t* seg_header, /* in: segment header */
ulint space, /* in: space id */
ulint page, /* in: page offset in the extent */
mtr_t* mtr); /* in: mtr handle */
/**************************************************************************
Calculates the number of pages reserved by a segment, and how
many pages are currently used. */
static
ulint
fseg_n_reserved_pages_low(
/*======================*/
/* out: number of reserved pages */
fseg_header_t* header, /* in: segment header */
ulint* used, /* out: number of pages used (<= reserved) */
mtr_t* mtr); /* in: mtr handle */
/************************************************************************
Marks a page used. The page must reside within the extents of the given
segment. */
static
void
fseg_mark_page_used(
/*================*/
fseg_header_t* seg_header,/* in: segment header */
ulint space, /* in: space id */
ulint page, /* in: page offset */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Frees a single page of a segment. */
static
void
fseg_free_page_low(
/*===============*/
fseg_header_t* seg_header, /* in: segment header */
ulint space, /* in: space id */
ulint page, /* in: page offset */
mtr_t* mtr); /* in: mtr handle */
/**************************************************************************
Returns the first extent descriptor for a segment. We think of the extent
lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
-> FSEG_FREE. */
static
xdes_t*
fseg_get_first_extent(
/*==================*/
/* out: the first extent descriptor, or NULL if
none */
fseg_header_t* header, /* in: segment header */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Puts new extents to the free list if
there are free extents above the free limit. If an extent happens
to contain an extent descriptor page, the extent is put to
the FSP_FREE_FRAG list with the page marked as used. */
static
void
fsp_fill_free_list(
/*===============*/
ulint space, /* in: space */
fsp_header_t* header, /* in: space header */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Gets a descriptor bit of a page. */
UNIV_INLINE
bool
xdes_get_bit(
/*=========*/
/* out: TRUE if free */
xdes_t* descr, /* in: descriptor */
ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
ulint offset, /* in: page offset within extent:
0 ... FSP_EXTENT_SIZE - 1 */
mtr_t* mtr) /* in: mtr */
{
ulint index;
ulint byte_index;
ulint bit_index;
ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
MTR_MEMO_PAGE_X_LOCK));
ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
ut_ad(offset < FSP_EXTENT_SIZE);
index = bit + XDES_BITS_PER_PAGE * offset;
byte_index = index / 8;
bit_index = index % 8;
return(ut_bit_get_nth(
mtr_read_ulint(descr + XDES_BITMAP + byte_index,
MLOG_1BYTE, mtr),
bit_index));
}
/**************************************************************************
Sets a descriptor bit of a page. */
UNIV_INLINE
void
xdes_set_bit(
/*=========*/
xdes_t* descr, /* in: descriptor */
ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
ulint offset, /* in: page offset within extent:
0 ... FSP_EXTENT_SIZE - 1 */
bool val, /* in: bit value */
mtr_t* mtr) /* in: mtr */
{
ulint index;
ulint byte_index;
ulint bit_index;
ulint descr_byte;
ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
MTR_MEMO_PAGE_X_LOCK));
ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
ut_ad(offset < FSP_EXTENT_SIZE);
index = bit + XDES_BITS_PER_PAGE * offset;
byte_index = index / 8;
bit_index = index % 8;
descr_byte = mtr_read_ulint(descr + XDES_BITMAP + byte_index,
MLOG_1BYTE, mtr);
descr_byte = ut_bit_set_nth(descr_byte, bit_index, val);
mlog_write_ulint(descr + XDES_BITMAP + byte_index,
descr_byte, MLOG_1BYTE, mtr);
}
/**************************************************************************
Looks for a descriptor bit having the desired value. Starts from hint
and scans upward; at the end of the extent the search is wrapped to
the start of the extent. */
UNIV_INLINE
ulint
xdes_find_bit(
/*==========*/
/* out: bit index of the bit,
ULINT_UNDEFINED if not found */
xdes_t* descr, /* in: descriptor */
ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
bool val, /* in: desired bit value */
ulint hint, /* in: hint of which bit position would be
desirable */
mtr_t* mtr) /* in: mtr */
{
ulint i;
ut_ad(descr && mtr);
ut_ad(val <= TRUE);
ut_ad(hint < FSP_EXTENT_SIZE);
ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
MTR_MEMO_PAGE_X_LOCK));
for (i = hint; i < FSP_EXTENT_SIZE; i++) {
if (val == xdes_get_bit(descr, bit, i, mtr)) {
return(i);
}
}
for (i = 0; i < hint; i++) {
if (val == xdes_get_bit(descr, bit, i, mtr)) {
return(i);
}
}
return(ULINT_UNDEFINED);
}
/**************************************************************************
Looks for a descriptor bit having the desired value. Scans the extent in
a direction opposite to xdes_find_bit. */
UNIV_INLINE
ulint
xdes_find_bit_downward(
/*===================*/
/* out: bit index of the bit,
ULINT_UNDEFINED if not found */
xdes_t* descr, /* in: descriptor */
ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
bool val, /* in: desired bit value */
ulint hint, /* in: hint of which bit position would be
desirable */
mtr_t* mtr) /* in: mtr */
{
ulint i;
ut_ad(descr && mtr);
ut_ad(val <= TRUE);
ut_ad(hint < FSP_EXTENT_SIZE);
ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
MTR_MEMO_PAGE_X_LOCK));
for (i = hint + 1; i > 0; i--) {
if (val == xdes_get_bit(descr, bit, i - 1, mtr)) {
return(i - 1);
}
}
for (i = FSP_EXTENT_SIZE - 1; i > hint; i--) {
if (val == xdes_get_bit(descr, bit, i, mtr)) {
return(i);
}
}
return(ULINT_UNDEFINED);
}
/**************************************************************************
Returns the number of used pages in a descriptor. */
UNIV_INLINE
ulint
xdes_get_n_used(
/*============*/
/* out: number of pages used */
xdes_t* descr, /* in: descriptor */
mtr_t* mtr) /* in: mtr */
{
ulint i;
ulint count = 0;
ut_ad(descr && mtr);
ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
MTR_MEMO_PAGE_X_LOCK));
for (i = 0; i < FSP_EXTENT_SIZE; i++) {
if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
count++;
}
}
return(count);
}
/**************************************************************************
Returns true if extent contains no used pages. */
UNIV_INLINE
bool
xdes_is_free(
/*=========*/
/* out: TRUE if totally free */
xdes_t* descr, /* in: descriptor */
mtr_t* mtr) /* in: mtr */
{
if (0 == xdes_get_n_used(descr, mtr)) {
return(TRUE);
} else {
return(FALSE);
}
}
/**************************************************************************
Returns true if extent contains no free pages. */
UNIV_INLINE
bool
xdes_is_full(
/*=========*/
/* out: TRUE if full */
xdes_t* descr, /* in: descriptor */
mtr_t* mtr) /* in: mtr */
{
if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) {
return(TRUE);
} else {
return(FALSE);
}
}
/**************************************************************************
Sets the state of an xdes. */
UNIV_INLINE
void
xdes_set_state(
/*===========*/
xdes_t* descr, /* in: descriptor */
ulint state, /* in: state to set */
mtr_t* mtr) /* in: mtr handle */
{
ut_ad(descr && mtr);
ut_ad(state >= XDES_FREE);
ut_ad(state <= XDES_FSEG);
ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
MTR_MEMO_PAGE_X_LOCK));
mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr);
}
/**************************************************************************
Gets the state of an xdes. */
UNIV_INLINE
ulint
xdes_get_state(
/*===========*/
/* out: state */
xdes_t* descr, /* in: descriptor */
mtr_t* mtr) /* in: mtr handle */
{
ut_ad(descr && mtr);
ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
MTR_MEMO_PAGE_X_LOCK));
return(mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr));
}
/**************************************************************************
Inits an extent descriptor to free and clean state. */
UNIV_INLINE
void
xdes_init(
/*======*/
xdes_t* descr, /* in: descriptor */
mtr_t* mtr) /* in: mtr */
{
ulint i;
ut_ad(descr && mtr);
ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
MTR_MEMO_PAGE_X_LOCK));
for (i = 0; i < FSP_EXTENT_SIZE; i++) {
xdes_set_bit(descr, XDES_FREE_BIT, i, TRUE, mtr);
xdes_set_bit(descr, XDES_CLEAN_BIT, i, TRUE, mtr);
}
xdes_set_state(descr, XDES_FREE, mtr);
}
/************************************************************************
Calculates the page where the descriptor of a page resides. */
UNIV_INLINE
ulint
xdes_calc_descriptor_page(
/*======================*/
/* out: descriptor page offset */
ulint offset) /* in: page offset */
{
ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET
+ (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE) * XDES_SIZE);
return(ut_2pow_round(offset, XDES_DESCRIBED_PER_PAGE));
}
/************************************************************************
Calculates the descriptor index within a descriptor page. */
UNIV_INLINE
ulint
xdes_calc_descriptor_index(
/*=======================*/
/* out: descriptor index */
ulint offset) /* in: page offset */
{
return(ut_2pow_remainder(offset, XDES_DESCRIBED_PER_PAGE) /
FSP_EXTENT_SIZE);
}
/************************************************************************
Gets pointer to a the extent descriptor of a page. The page where the
extent descriptor resides is x-locked. If the page offset is equal to the free
limit of the space, adds new extents from above the free limit
to the space free list, if not free limit == space size. This adding
is necessary to make the descriptor defined, as they are uninitialized
above the free limit. */
UNIV_INLINE
xdes_t*
xdes_get_descriptor_with_space_hdr(
/*===============================*/
/* out: pointer to the extent descriptor,
NULL if the page does not exist in the
space or if offset > free limit */
fsp_header_t* sp_header,/* in: space header, x-latched */
ulint space, /* in: space id */
ulint offset, /* in: page offset;
if equal to the free limit,
we try to add new extents to
the space free list */
mtr_t* mtr) /* in: mtr handle */
{
ulint limit;
ulint size;
buf_block_t* buf_page;
ulint descr_page_no;
page_t* descr_page;
ut_ad(mtr);
ut_ad(mtr_memo_contains(mtr, &fsp_latch, MTR_MEMO_X_LOCK));
/* Read free limit and space size */
limit = mtr_read_ulint(sp_header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
size = mtr_read_ulint(sp_header + FSP_SIZE, MLOG_4BYTES, mtr);
/* If offset is >= size or > limit, return NULL */
if ((offset >= size) || (offset > limit)) {
return(NULL);
}
/* If offset is == limit, fill free list of the space. */
if (offset == limit) {
fsp_fill_free_list(space, sp_header, mtr);
}
descr_page_no = xdes_calc_descriptor_page(offset);
if (descr_page_no == 0) {
/* It is on the space header page */
descr_page = buf_frame_align(sp_header);
} else {
buf_page = buf_page_get(space, descr_page_no, mtr);
buf_page_x_lock(buf_page, mtr);
descr_page = buf_block_get_frame(buf_page);
}
return(descr_page + XDES_ARR_OFFSET
+ XDES_SIZE * xdes_calc_descriptor_index(offset));
}
/************************************************************************
Gets pointer to a the extent descriptor of a page. The page where the
extent descriptor resides is x-locked. If the page offset is equal to the free
limit of the space, adds new extents from above the free limit
to the space free list, if not free limit == space size. This adding
is necessary to make the descriptor defined, as they are uninitialized
above the free limit. */
static
xdes_t*
xdes_get_descriptor(
/*================*/
/* out: pointer to the extent descriptor,
NULL if the page does not exist in the
space or if offset > free limit */
ulint space, /* in: space id */
ulint offset, /* in: page offset;
if equal to the free limit,
we try to add new extents to
the space free list */
mtr_t* mtr) /* in: mtr handle */
{
fsp_header_t* sp_header;
buf_block_t* block;
block = buf_page_get(space, 0, mtr); /* get space header */
sp_header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
buf_page_x_lock(block, mtr);
return(xdes_get_descriptor_with_space_hdr(sp_header, space, offset,
mtr));
}
/************************************************************************
Gets pointer to a the extent descriptor if the file address
of the descriptor list node is known. The page where the
extent descriptor resides is x-locked. */
UNIV_INLINE
xdes_t*
xdes_lst_get_descriptor(
/*====================*/
/* out: pointer to the extent descriptor */
ulint space, /* in: space id */
fil_addr_t lst_node,/* in: file address of the list node
contained in the descriptor */
mtr_t* mtr) /* in: mtr handle */
{
xdes_t* descr;
ut_ad(mtr);
ut_ad(mtr_memo_contains(mtr, &fsp_latch, MTR_MEMO_X_LOCK));
descr = fut_get_ptr_x_lock(space, lst_node, mtr) - XDES_FLST_NODE;
return(descr);
}
/************************************************************************
Gets pointer to the next descriptor in a descriptor list and x-locks
its page. */
UNIV_INLINE
xdes_t*
xdes_lst_get_next(
/*==============*/
xdes_t* descr, /* in: pointer to a descriptor */
mtr_t* mtr) /* in: mtr handle */
{
ulint space;
ut_ad(mtr && descr);
space = buf_page_get_space(buf_block_align(descr));
return(xdes_lst_get_descriptor(space,
flst_get_next_addr(descr + XDES_FLST_NODE, mtr), mtr));
}
/************************************************************************
Returns page offset of the first page in extent described by a descriptor.
*/
UNIV_INLINE
ulint
xdes_get_offset(
/*============*/
/* out: offset of the first page in extent */
xdes_t* descr) /* in: extent descriptor */
{
buf_block_t* buf_page;
ut_ad(descr);
buf_page = buf_block_align(descr);
return(buf_page_get_offset(buf_page)
+ ((descr - buf_frame_align(descr) - XDES_ARR_OFFSET)
/ XDES_SIZE)
* FSP_EXTENT_SIZE);
}
/**************************************************************************
Gets a pointer to the space header and x-locks its page. */
UNIV_INLINE
fsp_header_t*
fsp_get_space_header(
/*=================*/
/* out: pointer to the space header, page x-locked */
ulint id, /* in: space id */
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
ut_ad(mtr);
block = buf_page_get(id, 0, mtr);
buf_page_x_lock(block, mtr);
return(FSP_HEADER_OFFSET + buf_block_get_frame(block));
}
/**************************************************************************
Initializes the file space system mutex. */
void
fsp_init(void)
/*==========*/
{
rw_lock_create(&fsp_latch);
}
/**************************************************************************
Initializes the space header of a new created space. */
void
fsp_header_init(
/*============*/
ulint space, /* in: space id */
ulint size, /* in: current size in blocks */
mtr_t* mtr) /* in: mini-transaction handle */
{
fsp_header_t* header;
ut_ad(mtr);
mtr_x_lock(&fsp_latch, mtr);
header = fsp_get_space_header(space, mtr);
mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr);
mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr);
mlog_write_ulint(header + FSP_LOWEST_NO_WRITE, 0, MLOG_4BYTES, mtr);
mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr);
flst_init(header + FSP_FREE, mtr);
flst_init(header + FSP_FREE_FRAG, mtr);
flst_init(header + FSP_FULL_FRAG, mtr);
flst_init(header + FSP_SEG_HDRS_FULL, mtr);
flst_init(header + FSP_SEG_HDRS_FREE, mtr);
mlog_write_dulint(header + FSP_SEG_ID, ut_dulint_create(0, 1),
MLOG_8BYTES, mtr);
}
/**************************************************************************
Increases the space size field of a space. */
void
fsp_header_inc_size(
/*================*/
ulint space, /* in: space id */
ulint size_inc,/* in: size increment in pages */
mtr_t* mtr) /* in: mini-transaction handle */
{
fsp_header_t* header;
ulint size;
ut_ad(mtr);
mtr_x_lock(&fsp_latch, mtr);
header = fsp_get_space_header(space, mtr);
size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
mlog_write_ulint(header + FSP_SIZE, size + size_inc, MLOG_4BYTES, mtr);
}
/**************************************************************************
Puts new extents to the free list if there are free extents above the free
limit. If an extent happens to contain an extent descriptor page, the extent
is put to the FSP_FREE_FRAG list with the page marked as used. */
static
void
fsp_fill_free_list(
/*===============*/
ulint space, /* in: space */
fsp_header_t* header, /* in: space header */
mtr_t* mtr) /* in: mtr */
{
ulint limit;
ulint size;
ulint i;
xdes_t* descr;
ulint count = 0;
ulint frag_n_used;
ut_ad(header && mtr);
/* Check if we can fill free list from above the free list limit */
size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
i = limit;
while ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD)) {
mlog_write_ulint(header + FSP_FREE_LIMIT,
i + FSP_EXTENT_SIZE, MLOG_4BYTES, mtr);
descr = xdes_get_descriptor_with_space_hdr(header, space, i,
mtr);
xdes_init(descr, mtr);
ut_ad(XDES_DESCRIBED_PER_PAGE % FSP_EXTENT_SIZE == 0);
if (0 == i % XDES_DESCRIBED_PER_PAGE) {
/* The first page in the extent is a descriptor page:
mark it used */
xdes_set_bit(descr, XDES_FREE_BIT, 0, FALSE, mtr);
xdes_set_state(descr, XDES_FREE_FRAG, mtr);
flst_add_last(header + FSP_FREE_FRAG,
descr + XDES_FLST_NODE, mtr);
frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
MLOG_4BYTES, mtr);
mlog_write_ulint(header + FSP_FRAG_N_USED,
frag_n_used + 1,
MLOG_4BYTES, mtr);
} else {
flst_add_last(header + FSP_FREE,
descr + XDES_FLST_NODE, mtr);
count++;
}
i += FSP_EXTENT_SIZE;
}
}
/**************************************************************************
Allocates a new free extent. */
static
xdes_t*
fsp_alloc_free_extent(
/*==================*/
/* out: extent descriptor, NULL if cannot
be allocated */
ulint space, /* in: space id */
ulint hint, /* in: hint of which extent would be
desirable: any page offset in the extent
goes; the hint must not be > FSP_FREE_LIMIT */
mtr_t* mtr) /* in: mtr */
{
fsp_header_t* header;
fil_addr_t first;
xdes_t* descr;
ut_ad(mtr);
header = fsp_get_space_header(space, mtr);
descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
if (descr && (xdes_get_state(descr, mtr) == XDES_FREE)) {
/* Ok, we can take this extent */
} else {
/* Take the first extent in the free list */
first = flst_get_first(header + FSP_FREE, mtr);
if (fil_addr_is_null(first)) {
fsp_fill_free_list(space, header, mtr);
first = flst_get_first(header + FSP_FREE, mtr);
}
if (fil_addr_is_null(first)) {
return(NULL); /* No free extents left */
}
descr = xdes_lst_get_descriptor(space, first, mtr);
}
flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
return(descr);
}
/**************************************************************************
Allocates a single free page from a space. The page is marked as used. */
static
ulint
fsp_alloc_free_page(
/*================*/
/* out: the page offset, FIL_NULL
if no page could be allocated */
ulint space, /* in: space id */
ulint hint, /* in: hint of which page would be desirable */
mtr_t* mtr) /* in: mtr handle */
{
fsp_header_t* header;
fil_addr_t first;
xdes_t* descr;
ulint free;
ulint frag_n_used;
ut_ad(mtr);
header = fsp_get_space_header(space, mtr);
/* Get the hinted descriptor */
descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
if (descr && (xdes_get_state(descr, mtr) == XDES_FREE_FRAG)) {
/* Ok, we can take this extent */
} else {
/* Else take the first extent in free_frag list */
first = flst_get_first(header + FSP_FREE_FRAG, mtr);
if (fil_addr_is_null(first)) {
/* There are no partially full fragments: allocate
a free extent and add it to the FREE_FRAG
list. NOTE that the allocation may have as a
side-effect that an extent containing a descriptor
page is added to the FREE_FRAG list. But we will
allocate our page from the allocated free extent. */
descr = fsp_alloc_free_extent(space, hint, mtr);
if (descr == NULL) {
/* No free space left */
return(FIL_NULL);
}
xdes_set_state(descr, XDES_FREE_FRAG, mtr);
flst_add_last(header + FSP_FREE_FRAG,
descr + XDES_FLST_NODE, mtr);
} else {
descr = xdes_lst_get_descriptor(space, first, mtr);
}
/* Reset the hint */
hint = 0;
}
/* Now we have in descr an extent with at least one free page.
Look for a free page in the extent. */
free = xdes_find_bit(descr, XDES_FREE_BIT, TRUE,
hint % FSP_EXTENT_SIZE, mtr);
ut_a(free != ULINT_UNDEFINED);
xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr);
/* Update the FRAG_N_USED field */
frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
MLOG_4BYTES, mtr);
frag_n_used++;
mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used,
MLOG_4BYTES, mtr);
if (xdes_is_full(descr, mtr)) {
/* The fragment is full: move it to another list */
flst_remove(header + FSP_FREE_FRAG,
descr + XDES_FLST_NODE, mtr);
xdes_set_state(descr, XDES_FULL_FRAG, mtr);
flst_add_last(header + FSP_FULL_FRAG,
descr + XDES_FLST_NODE, mtr);
mlog_write_ulint(header + FSP_FRAG_N_USED,
frag_n_used - FSP_EXTENT_SIZE,
MLOG_4BYTES, mtr);
}
return(xdes_get_offset(descr) + free);
}
/**************************************************************************
Frees a single page of a space. The page is marked as free and clean. */
static
void
fsp_free_page(
/*==========*/
ulint space, /* in: space id */
ulint page, /* in: page offset */
mtr_t* mtr) /* in: mtr handle */
{
fsp_header_t* header;
xdes_t* descr;
ulint state;
ulint frag_n_used;
ut_ad(mtr);
header = fsp_get_space_header(space, mtr);
descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
state = xdes_get_state(descr, mtr);
ut_a((state == XDES_FREE_FRAG) || (state == XDES_FULL_FRAG));
ut_a(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
== FALSE);
xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
MLOG_4BYTES, mtr);
if (state == XDES_FULL_FRAG) {
/* The fragment was full: move it to another list */
flst_remove(header + FSP_FULL_FRAG,
descr + XDES_FLST_NODE, mtr);
xdes_set_state(descr, XDES_FREE_FRAG, mtr);
flst_add_last(header + FSP_FREE_FRAG,
descr + XDES_FLST_NODE, mtr);
mlog_write_ulint(header + FSP_FRAG_N_USED,
frag_n_used + FSP_EXTENT_SIZE - 1,
MLOG_4BYTES, mtr);
} else {
ut_a(frag_n_used > 0);
mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used - 1,
MLOG_4BYTES, mtr);
}
if (xdes_is_free(descr, mtr)) {
/* The extent has become free: move it to another list */
flst_remove(header + FSP_FREE_FRAG,
descr + XDES_FLST_NODE, mtr);
fsp_free_extent(space, page, mtr);
}
}
/**************************************************************************
Returns an extent to the free list of a space. */
static
void
fsp_free_extent(
/*============*/
ulint space, /* in: space id */
ulint page, /* in: page offset in the extent */
mtr_t* mtr) /* in: mtr */
{
fsp_header_t* header;
xdes_t* descr;
ut_ad(mtr);
header = fsp_get_space_header(space, mtr);
descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
ut_a(xdes_get_state(descr, mtr) != XDES_FREE);
xdes_init(descr, mtr);
flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
}
/**************************************************************************
Looks for an unused segment header on a segment header page. */
UNIV_INLINE
fseg_header_t*
fsp_seg_hdr_page_get_nth_hdr(
/*=========================*/
/* out: segment header */
page_t* page, /* in: segment header page */
ulint i, /* in: search forward starting from this index */
mtr_t* mtr) /* in: mini-transaction handle */
{
ut_ad(i < FSP_SEG_HDRS_PER_PAGE);
ut_ad(mtr_memo_contains(mtr, page, MTR_MEMO_PAGE_X_LOCK));
return(page + FSEG_ARR_OFFSET + FSEG_HEADER_SIZE * i);
}
/**************************************************************************
Looks for a used segment header on a segment header page. */
static
ulint
fsp_seg_hdr_page_find_used(
/*=======================*/
/* out: segment header index, or ULINT_UNDEFINED
if not found */
page_t* page, /* in: segment header page */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint i;
fseg_header_t* header;
for (i = 0; i < FSP_SEG_HDRS_PER_PAGE; i++) {
header = fsp_seg_hdr_page_get_nth_hdr(page, i, mtr);
if (ut_dulint_cmp(mach_read_from_8(header + FSEG_ID),
ut_dulint_zero) != 0) {
/* This is used */
return(i);
}
}
return(ULINT_UNDEFINED);
}
/**************************************************************************
Looks for an unused segment header on a segment header page. */
static
ulint
fsp_seg_hdr_page_find_free(
/*=======================*/
/* out: segment header index, or ULINT_UNDEFINED
if not found */
page_t* page, /* in: segment header page */
ulint j, /* in: search forward starting from this index */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint i;
fseg_header_t* header;
for (i = j; i < FSP_SEG_HDRS_PER_PAGE; i++) {
header = fsp_seg_hdr_page_get_nth_hdr(page, i, mtr);
if (ut_dulint_cmp(mach_read_from_8(header + FSEG_ID),
ut_dulint_zero) == 0) {
/* This is unused */
return(i);
}
}
return(ULINT_UNDEFINED);
}
/**************************************************************************
Allocates a new file segment header page. */
static
bool
fsp_alloc_seg_hdr_page(
/*===================*/
/* out: TRUE if could be allocated */
fsp_header_t* space_header, /* in: space header */
mtr_t* mtr) /* in: mini-transaction handle */
{
buf_block_t* block;
ulint page_no;
page_t* page;
fseg_header_t* header;
ulint i;
page_no = fsp_alloc_free_page(buf_frame_get_space(space_header),
0, mtr);
if (page_no == FIL_NULL) {
return(FALSE);
}
block = buf_page_get(buf_frame_get_space(space_header), page_no, mtr);
buf_page_x_lock(block, mtr);
page = buf_block_get_frame(block);
for (i = 0; i < FSP_SEG_HDRS_PER_PAGE; i++) {
header = fsp_seg_hdr_page_get_nth_hdr(page, i, mtr);
mlog_write_dulint(header + FSEG_ID, ut_dulint_zero,
MLOG_8BYTES, mtr);
}
flst_add_last(space_header + FSP_SEG_HDRS_FREE,
page + FSEG_HDR_PAGE_NODE, mtr);
return(TRUE);
}
/**************************************************************************
Allocates a new file segment header. */
static
fseg_header_t*
fsp_alloc_seg_header(
/*=================*/
/* out: segment header, or NULL if
not enough space */
fsp_header_t* space_header, /* in: space header */
mtr_t* mtr) /* in: mini-transaction handle */
{
buf_block_t* block;
ulint page_no;
page_t* page;
fseg_header_t* header;
ulint n;
bool success;
if (flst_get_len(space_header + FSP_SEG_HDRS_FREE, mtr) == 0) {
/* Allocate a new segment header page */
success = fsp_alloc_seg_hdr_page(space_header, mtr);
if (!success) {
return(NULL);
}
}
page_no = flst_get_first(space_header + FSP_SEG_HDRS_FREE, mtr).page;
block = buf_page_get(buf_frame_get_space(space_header), page_no, mtr);
buf_page_x_lock(block, mtr);
page = buf_block_get_frame(block);
n = fsp_seg_hdr_page_find_free(page, 0, mtr);
ut_a(n != ULINT_UNDEFINED);
header = fsp_seg_hdr_page_get_nth_hdr(page, n, mtr);
if (ULINT_UNDEFINED == fsp_seg_hdr_page_find_free(page, n + 1, mtr)) {
/* There are no other unused headers left on the page: move it
to another list */
flst_remove(space_header + FSP_SEG_HDRS_FREE,
page + FSEG_HDR_PAGE_NODE, mtr);
flst_add_last(space_header + FSP_SEG_HDRS_FULL,
page + FSEG_HDR_PAGE_NODE, mtr);
}
return(header);
}
/**************************************************************************
Frees a file segment header. */
static
void
fsp_free_seg_header(
/*================*/
ulint space, /* in: space id */
fseg_header_t* header, /* in: segment header */
mtr_t* mtr) /* in: mini-transaction handle */
{
page_t* page;
fsp_header_t* space_header;
page = buf_frame_align(header);
space_header = fsp_get_space_header(space, mtr);
ut_ad(mach_read_from_4(header + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
if (ULINT_UNDEFINED == fsp_seg_hdr_page_find_free(page, mtr)) {
/* Move the page to another list */
flst_remove(space_header + FSP_SEG_HDRS_FULL,
page + FSEG_HDR_PAGE_NODE, mtr);
flst_add_last(space_header + FSP_SEG_HDRS_FREE,
page + FSEG_HDR_PAGE_NODE, mtr);
}
mlog_write_dulint(header + FSEG_ID, ut_dulint_zero, MLOG_8BYTES, mtr);
mlog_write_ulint(header + FSEG_MAGIC_N, 0, MLOG_4BYTES, mtr);
if (ULINT_UNDEFINED == fsp_seg_hdr_page_find_used(page, mtr)) {
/* There are no other used headers left on the page: free it */
flst_remove(space_header + FSP_SEG_HDRS_FREE,
page + FSEG_HDR_PAGE_NODE, mtr);
fsp_free_page(space, page_no, mtr);
}
}
/**************************************************************************
Gets the page number from the nth fragment page slot. */
UNIV_INLINE
ulint
fseg_get_nth_frag_page_no(
/*======================*/
/* out: page number, FIL_NULL if not in use */
fseg_header_t* header, /* in: segment header */
ulint n, /* in: slot index */
mtr_t* mtr) /* in: mtr handle */
{
ut_ad(header && mtr);
ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
ut_ad(mtr_memo_contains(mtr, buf_block_align(header),
MTR_MEMO_PAGE_X_LOCK));
return(mach_read_from_4(header + FSEG_FRAG_ARR
+ n * FSEG_FRAG_SLOT_SIZE));
}
/**************************************************************************
Sets the page number in the nth fragment page slot. */
UNIV_INLINE
void
fseg_set_nth_frag_page_no(
/*======================*/
fseg_header_t* header, /* in: segment header */
ulint n, /* in: slot index */
ulint page_no,/* in: page number to set */
mtr_t* mtr) /* in: mtr handle */
{
ut_ad(header && mtr);
ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
ut_ad(mtr_memo_contains(mtr, buf_block_align(header),
MTR_MEMO_PAGE_X_LOCK));
mlog_write_ulint(header + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE,
page_no, MLOG_4BYTES, mtr);
}
/**************************************************************************
Finds a fragment page slot which is free. */
static
ulint
fseg_find_free_frag_page_slot(
/*==========================*/
/* out: slot index; ULINT_UNDEFINED if none
found */
fseg_header_t* header, /* in: segment header */
mtr_t* mtr) /* in: mtr handle */
{
ulint i;
ulint page_no;
ut_ad(header && mtr);
for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
page_no = fseg_get_nth_frag_page_no(header, i, mtr);
if (page_no == FIL_NULL) {
return(i);
}
}
return(ULINT_UNDEFINED);
}
/**************************************************************************
Finds a fragment page slot which is used and last in the array. */
static
ulint
fseg_find_last_used_frag_page_slot(
/*===============================*/
/* out: slot index; ULINT_UNDEFINED if none
found */
fseg_header_t* header, /* in: segment header */
mtr_t* mtr) /* in: mtr handle */
{
ulint i;
ulint page_no;
ut_ad(header && mtr);
for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
page_no = fseg_get_nth_frag_page_no(header,
FSEG_ARR_N_SLOTS - i - 1, mtr);
if (page_no != FIL_NULL) {
return(i);
}
}
return(ULINT_UNDEFINED);
}
/**************************************************************************
Calculates reserved fragment page slots. */
static
ulint
fseg_get_n_frag_pages(
/*==================*/
/* out: number of fragment pages */
fseg_header_t* header, /* in: segment header */
mtr_t* mtr) /* in: mtr handle */
{
ulint i;
ulint count = 0;
ut_ad(header && mtr);
for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
if (FIL_NULL != fseg_get_nth_frag_page_no(header, i, mtr)) {
count++;
}
}
return(count);
}
/**************************************************************************
Creates a new segment. */
ulint
fseg_create(
/*========*/
/* out: the page number where the segment header is
placed, FIL_NULL if could not create segment because
lack of space */
ulint space, /* in: space id */
ulint* offset, /* out: byte offset of the segment header on its
page */
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
buf_frame_t* frame;
fsp_header_t* space_header;
fseg_header_t* header;
dulint seg_id;
ulint i;
ut_ad(mtr);
mtr_x_lock(&fsp_latch, mtr);
space_header = fsp_get_space_header(space, mtr);
header = fsp_alloc_seg_header(space_header, mtr);
if (header == NULL) {
return(FIL_NULL);
}
/* Read the next segment id from space header and increment the
value in space header */
seg_id = mtr_read_dulint(space_header + FSP_SEG_ID, MLOG_8BYTES, mtr);
mlog_write_dulint(space_header + FSP_SEG_ID, ut_dulint_add(seg_id, 1),
MLOG_8BYTES, mtr);
mlog_write_dulint(header + FSEG_ID, seg_id, MLOG_8BYTES, mtr);
mlog_write_ulint(header + FSEG_NOT_FULL_N_USED, 0, MLOG_4BYTES, mtr);
flst_init(header + FSEG_FREE, mtr);
flst_init(header + FSEG_NOT_FULL, mtr);
flst_init(header + FSEG_FULL, mtr);
mlog_write_ulint(header + FSEG_MAGIC_N, FSEG_MAGIC_N_VALUE,
MLOG_4BYTES, mtr);
for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
fseg_set_nth_frag_page_no(header, i, FIL_NULL, mtr);
}
*offset = header - buf_frame_align(header);
return(buf_frame_get_page(buf_frame_align(header)));
}
/**************************************************************************
Calculates the number of pages reserved by a segment, and how
many pages are currently used. */
ulint
fseg_n_reserved_pages(
/*==================*/
/* out: number of reserved pages */
fseg_header_t* header, /* in: segment header */
ulint* used, /* out: number of pages used (<= reserved) */
mtr_t* mtr) /* in: mtr handle */
{
ulint ret;
mtr_x_lock(&fsp_latch, mtr);
ret = fseg_n_reserved_pages_low(header, used, mtr);
return(ret);
}
/**************************************************************************
Calculates the number of pages reserved by a segment, and how
many pages are currently used. */
static
ulint
fseg_n_reserved_pages_low(
/*======================*/
/* out: number of reserved pages */
fseg_header_t* header, /* in: segment header */
ulint* used, /* out: number of pages used (<= reserved) */
mtr_t* mtr) /* in: mtr handle */
{
ulint ret;
ut_ad(header && used && mtr);
ut_ad(mtr_memo_contains(mtr, buf_block_align(header),
MTR_MEMO_BUF_FIX));
buf_page_x_lock(buf_block_align(header), mtr);
*used = mtr_read_ulint(header + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr)
+ FSP_EXTENT_SIZE * flst_get_len(header + FSEG_FULL, mtr)
+ fseg_get_n_frag_pages(header, mtr);
ret = fseg_get_n_frag_pages(header, mtr)
+ FSP_EXTENT_SIZE * flst_get_len(header + FSEG_FREE, mtr)
+ FSP_EXTENT_SIZE * flst_get_len(header + FSEG_NOT_FULL, mtr)
+ FSP_EXTENT_SIZE * flst_get_len(header + FSEG_FULL, mtr);
return(ret);
}
/*************************************************************************
Tries to fill the free list of a segment with consecutive free extents.
This happens if the segment is big enough to allowextents in the free list,
the free list is empty, and the extents can be allocated consecutively from
the hint onward. */
static
void
fseg_fill_free_list(
/*================*/
fseg_header_t* header, /* in: segment header */
ulint space, /* in: space id */
ulint hint, /* in: hint which extent would be good as
the first extent */
mtr_t* mtr) /* in: mtr */
{
xdes_t* descr;
ulint i;
dulint seg_id;
ulint reserved;
ulint used;
ut_ad(header && mtr);
buf_page_x_lock(buf_block_align(header), mtr);
reserved = fseg_n_reserved_pages_low(header, &used, mtr);
if (reserved < FSEG_FREE_LIST_LIMIT * FSP_EXTENT_SIZE) {
/* The segment is too small to allow extents in free list */
return;
}
if (flst_get_len(header + FSEG_FREE, mtr) > 0) {
/* Free list is not empty */
return;
}
for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) {
descr = xdes_get_descriptor(space, hint, mtr);
if ((descr == NULL) ||
(XDES_FREE != xdes_get_state(descr, mtr))) {
/* We cannot allocate the desired extent: stop */
return;
}
descr = fsp_alloc_free_extent(space, hint, mtr);
xdes_set_state(descr, XDES_FSEG, mtr);
seg_id = mtr_read_dulint(header + FSEG_ID, MLOG_8BYTES, mtr);
mlog_write_dulint(descr + XDES_ID, seg_id, MLOG_8BYTES, mtr);
flst_add_last(header + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
hint += FSP_EXTENT_SIZE;
}
}
/*************************************************************************
Allocates a free extent for the segment: looks first in the
free list of the segment, then tries to allocate from the space free
list. NOTE that the extent returned is still placed in the segment free
list, not taken off it! */
static
xdes_t*
fseg_alloc_free_extent(
/*===================*/
/* out: allocated extent, still placed in the
segment free list, NULL if could
not be allocated */
fseg_header_t* header, /* in: segment header */
ulint space, /* in: space id */
mtr_t* mtr) /* in: mtr */
{
xdes_t* descr;
dulint seg_id;
fil_addr_t first;
buf_page_x_lock(buf_block_align(header), mtr);
if (flst_get_len(header + FSEG_FREE, mtr) > 0) {
/* Segment free list is not empty, allocate from it */
first = flst_get_first(header + FSEG_FREE, mtr);
descr = xdes_lst_get_descriptor(space, first, mtr);
} else {
/* Segment free list was empty, allocate from space */
descr = fsp_alloc_free_extent(space, 0, mtr);
if (descr == NULL) {
return(NULL);
}
seg_id = mtr_read_dulint(header + FSEG_ID, MLOG_8BYTES, mtr);
xdes_set_state(descr, XDES_FSEG, mtr);
mlog_write_dulint(descr + XDES_ID, seg_id, MLOG_8BYTES, mtr);
flst_add_last(header + FSEG_FREE,
descr + XDES_FLST_NODE, mtr);
/* Try to fill the segment free list */
fseg_fill_free_list(header, space,
xdes_get_offset(descr) + FSP_EXTENT_SIZE, mtr);
}
return(descr);
}
/**************************************************************************
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
fragmentation. */
ulint
fseg_alloc_free_page(
/*=================*/
/* out: the allocated page offset
FIL_NULL if no page could be allocated */
fseg_header_t* seg_header, /* in: segment header */
ulint hint, /* in: hint of which page would be desirable */
byte direction, /* in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
mtr_t* mtr) /* in: mtr handle */
{
buf_block_t* block;
dulint seg_id;
fseg_page_header_t* page_header;
ulint space;
ulint used;
ulint reserved;
fil_addr_t first;
xdes_t* descr; /* extent of the hinted page */
ulint ret_page; /* the allocated page offset, FIL_NULL
if could not be allocated */
buf_block_t* ret_buf_page;
buf_frame_t* ret_frame;
xdes_t* ret_descr; /* the extent of the allocated page */
ulint n;
bool frag_page_allocated = FALSE;
ut_ad(seg_header && mtr);
ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
mtr_x_lock(&fsp_latch, mtr);
block = buf_block_align(seg_header);
buf_page_x_lock(block, mtr);
space = buf_page_get_space(block);
seg_id = mtr_read_dulint(seg_header + FSEG_ID, MLOG_8BYTES, mtr);
ut_ad(ut_dulint_cmp(seg_id, ut_dulint_zero) > 0);
reserved = fseg_n_reserved_pages_low(seg_header, &used, mtr);
descr = xdes_get_descriptor(space, hint, mtr);
if (descr == NULL) {
/* Hint outside space or too high above free limit:
reset hint */
hint = 0;
descr = xdes_get_descriptor(space, hint, mtr);
}
/* In the big if-else below we look for ret_page and ret_descr */
/*-------------------------------------------------------------*/
if ((xdes_get_state(descr, mtr) == XDES_FSEG)
&& (0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID,
MLOG_8BYTES, mtr),
seg_id))
&& (xdes_get_bit(descr, XDES_FREE_BIT,
hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
/* 1. We can take the hinted page
=================================*/
ret_descr = descr;
ret_page = hint;
/*-------------------------------------------------------------*/
} else if ((xdes_get_state(descr, mtr) == XDES_FREE)
&& ((reserved - used) < reserved / FSEG_FILLFACTOR)
&& (used >= FSEG_FRAG_LIMIT)) {
/* 2. We allocate the free extent from space and can take
=========================================================
the hinted page
===============*/
ret_descr = fsp_alloc_free_extent(space, hint, mtr);
ut_a(ret_descr == descr);
xdes_set_state(ret_descr, XDES_FSEG, mtr);
mlog_write_dulint(ret_descr + XDES_ID, seg_id, MLOG_8BYTES,
mtr);
flst_add_last(seg_header + FSEG_FREE,
ret_descr + XDES_FLST_NODE, mtr);
/* Try to fill the segment free list */
fseg_fill_free_list(seg_header, space,
hint + FSP_EXTENT_SIZE, mtr);
ret_page = hint;
/*-------------------------------------------------------------*/
} else if ((direction != FSP_NO_DIR)
&& ((reserved - used) < reserved / FSEG_FILLFACTOR)
&& (used >= FSEG_FRAG_LIMIT)
&& (NULL != (ret_descr =
fseg_alloc_free_extent(seg_header, space, mtr)))) {
/* 3. We take any free extent (which was already assigned above
===============================================================
in the if-condition to ret_descr) and take the lowest or
========================================================
highest page in it, depending on the direction
==============================================*/
ret_page = xdes_get_offset(ret_descr);
if (direction == FSP_DOWN) {
ret_page += FSP_EXTENT_SIZE - 1;
}
/*-------------------------------------------------------------*/
} else if ((xdes_get_state(descr, mtr) == XDES_FSEG)
&& (0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID,
MLOG_8BYTES, mtr),
seg_id))
&& (!xdes_is_full(descr, mtr))) {
/* 4. We can take the page from the same extent as the
======================================================
hinted page (and the extent already belongs to the
==================================================
segment)
========*/
ret_descr = descr;
ret_page = xdes_get_offset(ret_descr) +
xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
hint % FSP_EXTENT_SIZE, mtr);
/*-------------------------------------------------------------*/
} else if (reserved - used > 0) {
/* 5. We take any unused page from the segment
==============================================*/
if (flst_get_len(seg_header + FSEG_NOT_FULL, mtr) > 0) {
first = flst_get_first(seg_header + FSEG_NOT_FULL,
mtr);
} else if (flst_get_len(seg_header + FSEG_FREE, mtr) > 0) {
first = flst_get_first(seg_header + FSEG_FREE, mtr);
} else {
ut_error;
}
ret_descr = xdes_lst_get_descriptor(space, first, mtr);
ret_page = xdes_get_offset(ret_descr) +
xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
0, mtr);
/*-------------------------------------------------------------*/
} else if (used < FSEG_FRAG_LIMIT) {
/* 6. We allocate an individual page from the space
===================================================*/
ret_page = fsp_alloc_free_page(space, hint, mtr);
ret_descr = NULL;
frag_page_allocated = TRUE;
if (ret_page != FIL_NULL) {
/* Put the page in the fragment page array of the
segment */
n = fseg_find_free_frag_page_slot(seg_header, mtr);
ut_a(n != FIL_NULL);
fseg_set_nth_frag_page_no(seg_header, n, ret_page,
mtr);
}
/*-------------------------------------------------------------*/
} else {
/* 7. We allocate a new extent and take its first page
======================================================*/
ret_descr = fseg_alloc_free_extent(seg_header, space, mtr);
if (ret_descr == NULL) {
ret_page = FIL_NULL;
} else {
ret_page = xdes_get_offset(ret_descr);
}
}
if (ret_page == FIL_NULL) {
/* Page could not be allocated */
return(FIL_NULL);
}
/* Initialize the allocated page to buffer pool, so that it can be
obtained immediately with buf_page_get without need for disk read */
ret_buf_page = buf_page_create(space, ret_page, mtr);
if (!frag_page_allocated) {
/* At this point we know the extent and the page offset.
The extent is still in the appropriate list (FSEG_NOT_FULL or
FSEG_FREE), and the page is not yet marked as used. */
ut_ad(xdes_get_descriptor(space, ret_page, mtr) == ret_descr);
ut_ad(xdes_get_bit(ret_descr, XDES_FREE_BIT,
ret_page % FSP_EXTENT_SIZE, mtr) == TRUE);
fseg_mark_page_used(seg_header, space, ret_page, mtr);
}
return(ret_page);
}
/************************************************************************
Marks a page used. The page must reside within the extents of the given
segment. */
static
void
fseg_mark_page_used(
/*================*/
fseg_header_t* seg_header,/* in: segment header */
ulint space, /* in: space id */
ulint page, /* in: page offset */
mtr_t* mtr) /* in: mtr */
{
xdes_t* descr;
ulint not_full_n_used;
ut_ad(seg_header && mtr);
descr = xdes_get_descriptor(space, page, mtr);
ut_ad(mtr_read_ulint(seg_header + FSEG_ID, MLOG_4BYTES, mtr) ==
mtr_read_ulint(descr + XDES_ID, MLOG_4BYTES, mtr));
if (xdes_is_free(descr, mtr)) {
/* We move the extent from the free list to the
NOT_FULL list */
flst_remove(seg_header + FSEG_FREE,
descr + XDES_FLST_NODE, mtr);
flst_add_last(seg_header + FSEG_NOT_FULL,
descr + XDES_FLST_NODE, mtr);
}
ut_ad(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
== TRUE);
/* We mark the page as used */
xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, FALSE, mtr);
not_full_n_used = mtr_read_ulint(seg_header + FSEG_NOT_FULL_N_USED,
MLOG_4BYTES, mtr);
not_full_n_used++;
mlog_write_ulint(seg_header + FSEG_NOT_FULL_N_USED,
not_full_n_used, MLOG_4BYTES, mtr);
if (xdes_is_full(descr, mtr)) {
/* We move the extent from the NOT_FULL list to the
FULL list */
flst_remove(seg_header + FSEG_NOT_FULL,
descr + XDES_FLST_NODE, mtr);
flst_add_last(seg_header + FSEG_FULL,
descr + XDES_FLST_NODE, mtr);
mlog_write_ulint(seg_header + FSEG_NOT_FULL_N_USED,
not_full_n_used - FSP_EXTENT_SIZE,
MLOG_4BYTES, mtr);
}
}
/**************************************************************************
Frees a single page of a segment. */
void
fseg_free_page(
/*===========*/
fseg_header_t* seg_header, /* in: segment header */
ulint space, /* in: space id */
ulint page, /* in: page offset */
mtr_t* mtr) /* in: mtr handle */
{
mtr_x_lock(&fsp_latch, mtr);
fseg_free_page_low(seg_header, space, page, mtr);
}
/**************************************************************************
Frees a single page of a segment. */
static
void
fseg_free_page_low(
/*===============*/
fseg_header_t* seg_header, /* in: segment header */
ulint space, /* in: space id */
ulint page, /* in: page offset */
mtr_t* mtr) /* in: mtr handle */
{
buf_block_t* block;
xdes_t* descr;
ulint used;
ulint not_full_n_used;
ulint state;
buf_block_t* buf_page;
buf_frame_t* buf_frame;
ulint i;
ut_ad(seg_header && mtr);
block = buf_block_align(seg_header);
buf_page_x_lock(block, mtr);
descr = xdes_get_descriptor(space, page, mtr);
ut_a(descr);
ut_a(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
== FALSE);
state = xdes_get_state(descr, mtr);
if (state != XDES_FSEG) {
/* The page is in the fragment pages of the segment */
for (i = 0;; i++) {
if (fseg_get_nth_frag_page_no(seg_header, i, mtr)
== page) {
fseg_set_nth_frag_page_no(seg_header, i,
FIL_NULL, mtr);
break;
}
}
fsp_free_page(space, page, mtr);
return;
}
/* If we get here, the page is in some extent of the segment */
ut_a(0 == ut_dulint_cmp(
mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr),
mtr_read_dulint(seg_header + FSEG_ID, MLOG_8BYTES, mtr)));
not_full_n_used = mtr_read_ulint(seg_header + FSEG_NOT_FULL_N_USED,
MLOG_4BYTES, mtr);
if (xdes_is_full(descr, mtr)) {
/* The fragment is full: move it to another list */
flst_remove(seg_header + FSEG_FULL,
descr + XDES_FLST_NODE, mtr);
flst_add_last(seg_header + FSEG_NOT_FULL,
descr + XDES_FLST_NODE, mtr);
mlog_write_ulint(seg_header + FSEG_NOT_FULL_N_USED,
not_full_n_used + FSP_EXTENT_SIZE - 1,
MLOG_4BYTES, mtr);
} else {
ut_a(not_full_n_used > 0);
mlog_write_ulint(seg_header + FSEG_NOT_FULL_N_USED,
not_full_n_used - 1,
MLOG_4BYTES, mtr);
}
xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
if (xdes_is_free(descr, mtr)) {
/* The extent has become free: free it to space */
flst_remove(seg_header + FSEG_NOT_FULL,
descr + XDES_FLST_NODE, mtr);
fsp_free_extent(space, page, mtr);
}
}
/**************************************************************************
Frees an extent of a segment to the space free list. */
static
void
fseg_free_extent(
/*=============*/
fseg_header_t* seg_header, /* in: segment header */
ulint space, /* in: space id */
ulint page, /* in: page offset in the extent */
mtr_t* mtr) /* in: mtr handle */
{
buf_block_t* block;
xdes_t* descr;
ulint not_full_n_used;
ulint descr_n_used;
ut_ad(seg_header && mtr);
block = buf_block_align(seg_header);
buf_page_x_lock(block, mtr);
descr = xdes_get_descriptor(space, page, mtr);
ut_a(xdes_get_state(descr, mtr) == XDES_FSEG);
ut_a(0 == ut_dulint_cmp(
mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr),
mtr_read_dulint(seg_header + FSEG_ID, MLOG_8BYTES, mtr)));
if (xdes_is_full(descr, mtr)) {
flst_remove(seg_header + FSEG_FULL,
descr + XDES_FLST_NODE, mtr);
} else if (xdes_is_free(descr, mtr)) {
flst_remove(seg_header + FSEG_FREE,
descr + XDES_FLST_NODE, mtr);
} else {
flst_remove(seg_header + FSEG_NOT_FULL,
descr + XDES_FLST_NODE, mtr);
not_full_n_used = mtr_read_ulint(
seg_header + FSEG_NOT_FULL_N_USED,
MLOG_4BYTES, mtr);
descr_n_used = xdes_get_n_used(descr, mtr);
ut_a(not_full_n_used >= descr_n_used);
mlog_write_ulint(seg_header + FSEG_NOT_FULL_N_USED,
not_full_n_used - descr_n_used,
MLOG_4BYTES, mtr);
}
fsp_free_extent(space, page, mtr);
}
/**************************************************************************
Frees part of a segment. This function can be used to free a segment
by repeatedly calling this function in different mini-transactions.
Doing the freeing in a single mini-transaction might result in too big
a mini-transaction. */
bool
fseg_free_step(
/*===========*/
/* out: TRUE if freeing completed */
ulint space, /* in: segment space id */
ulint page_no,/* in: segment header page number */
ulint offset, /* in: segment header byte offset on page */
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
ulint n;
ulint page;
xdes_t* descr;
fseg_header_t* header;
fil_addr_t header_addr;
header_addr.page = page_no;
header_addr.boffset = offset;
mtr_x_lock(&fsp_latch, mtr);
header = fut_get_ptr_x_lock(space, header_addr, mtr);
descr = fseg_get_first_extent(header, mtr);
if (descr != NULL) {
/* Free the extent held by the segment */
page = xdes_get_offset(descr);
fseg_free_extent(header, space, page, mtr);
return(FALSE);
}
/* Free a frag page */
n = fseg_get_last_used_frag_page_slot(header, mtr);
if (n == ULINT_UNDEFINED) {
/* Freeing completed: free the segment header */
fsp_free_seg_header(space, header, mtr);
return(TRUE);
}
fseg_free_page_low(header, space,
fseg_get_nth_frag_page_no(header, n, mtr), mtr);
return(FALSE);
}
/***********************************************************************
Frees a segment. The freeing is performed in several mini-transactions,
so that there is no danger of bufferfixing too many buffer pages. */
void
fseg_free(
/*======*/
ulint space, /* in: space id */
ulint page_no,/* in: page number where the segment header is
placed */
ulint offset) /* in: byte offset of the segment header on that
page */
{
mtr_t mtr;
buf_block_t* block;
bool finished;
for (;;) {
mtr_start(&mtr);
block = buf_page_get(space, page_no, &mtr);
finished = fseg_free_step(space, page_no, offset, &mtr);
mtr_commit(&mtr);
if (finished) {
break;
}
}
}
/**************************************************************************
Returns the first extent descriptor for a segment. We think of the extent
lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
-> FSEG_FREE. */
static
xdes_t*
fseg_get_first_extent(
/*==================*/
/* out: the first extent descriptor, or NULL if
none */
fseg_header_t* header, /* in: segment header */
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
fil_addr_t first;
ulint space;
xdes_t* descr;
ut_ad(header && mtr);
block = buf_block_align(header);
buf_page_x_lock(block, mtr);
space = buf_page_get_space(block);
first = fil_addr_null;
if (flst_get_len(header + FSEG_FULL, mtr) > 0) {
first = flst_get_first(header + FSEG_FULL, mtr);
} else if (flst_get_len(header + FSEG_NOT_FULL, mtr) > 0) {
first = flst_get_first(header + FSEG_NOT_FULL, mtr);
} else if (flst_get_len(header + FSEG_FREE, mtr) > 0) {
first = flst_get_first(header + FSEG_FREE, mtr);
}
if (first.page == FIL_NULL) {
return(NULL);
} else {
descr = xdes_lst_get_descriptor(space, first, mtr);
return(descr);
}
}
#ifdef notdefined
/**************************************************************************
Returns the last non-free extent descriptor for a segment. We think of
the extent lists of the segment catenated in the order FSEG_FULL ->
FSEG_NOT_FULL -> FSEG_FREE. */
static
xdes_t*
fseg_get_last_non_free_extent(
/*==========================*/
/* out: the last extent descriptor, or NULL if
none */
fseg_header_t* header, /* in: segment header */
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
fil_addr_t last;
ulint space;
xdes_t* descr;
ut_ad(header && mtr);
block = buf_block_align(header);
buf_page_x_lock(block, mtr);
space = buf_page_get_space(block);
last = fil_addr_null;
if (flst_get_len(header + FSEG_NOT_FULL, mtr) > 0) {
last = flst_get_last(header + FSEG_NOT_FULL, mtr);
} else if (flst_get_len(header + FSEG_FULL, mtr) > 0) {
last = flst_get_last(header + FSEG_FULL, mtr);
}
if (last.page == FIL_NULL) {
return(NULL);
} else {
descr = xdes_lst_get_descriptor(space, last, mtr);
return(descr);
}
}
/**************************************************************************
Returns the next extent descriptor for a segment. We think of the extent
lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
-> FSEG_FREE. */
static
xdes_t*
fseg_get_next_extent(
/*=================*/
/* out: next extent descriptor, or NULL if
none */
fseg_header_t* header, /* in: segment header */
xdes_t* descr, /* in: previous extent descriptor */
mtr_t* mtr) /* in: mtr */
{
fil_addr_t next_addr;
buf_block_t* block;
ulint space;
ut_ad(header && descr && mtr);
block = buf_block_align(header);
buf_page_x_lock(block, mtr);
space = buf_page_get_space(block);
next_addr = flst_get_next_addr(descr + XDES_FLST_NODE, mtr);
if (next_addr.page == FIL_NULL) {
/* This is the last extent in the list. */
if (xdes_is_full(descr, mtr)) {
/* descr is in FSEG_FULL list */
if (flst_get_len(header + FSEG_NOT_FULL, mtr) > 0) {
next_addr = flst_get_first(header
+ FSEG_NOT_FULL, mtr);
} else if (flst_get_len(header + FSEG_FREE, mtr) > 0) {
next_addr = flst_get_first(header
+ FSEG_FREE, mtr);
}
} else if (!xdes_is_full(descr, mtr)
&& !xdes_is_free(descr, mtr)) {
/* descr is in FSEG_NOT_FULL list */
if (flst_get_len(header + FSEG_FREE, mtr) > 0) {
next_addr = flst_get_first(header
+ FSEG_FREE, mtr);
}
}
}
if (next_addr.page != FIL_NULL) {
descr = xdes_lst_get_descriptor(space, next_addr, mtr);
ut_ad(descr);
return(descr);
} else {
return(NULL);
}
}
/**************************************************************************
Returns the previous extent descriptor for a segment. We think of the extent
lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
-> FSEG_FREE. */
static
xdes_t*
fseg_get_prev_extent(
/*=================*/
/* out: previous extent descriptor, or NULL if
none */
fseg_header_t* header, /* in: segment header */
xdes_t* descr, /* in: extent descriptor */
mtr_t* mtr) /* in: mtr */
{
fil_addr_t prev_addr;
buf_block_t* block;
ulint space;
ut_ad(header && descr && mtr);
block = buf_block_align(header);
buf_page_x_lock(block, mtr);
space = buf_page_get_space(block);
prev_addr = flst_get_prev_addr(descr + XDES_FLST_NODE, mtr);
if (prev_addr.page == FIL_NULL) {
/* This is the first extent in the list. */
if (xdes_is_free(descr, mtr)) {
/* descr is in FSEG_FREE list */
if (flst_get_len(header + FSEG_NOT_FULL, mtr) > 0) {
prev_addr = flst_get_last(header
+ FSEG_NOT_FULL, mtr);
} else if (flst_get_len(header + FSEG_FULL, mtr) > 0) {
prev_addr = flst_get_last(header
+ FSEG_FULL, mtr);
}
} else if (!xdes_is_full(descr, mtr)
&& !xdes_is_free(descr, mtr)) {
/* descr is in FSEG_NOT_FULL list */
if (flst_get_len(header + FSEG_FULL, mtr) > 0) {
prev_addr = flst_get_last(header
+ FSEG_FULL, mtr);
}
}
}
if (prev_addr.page != FIL_NULL) {
descr = xdes_lst_get_descriptor(space, prev_addr, mtr);
ut_ad(descr);
return(descr);
} else {
return(NULL);
}
}
/*************************************************************************
Gets the first used page number in the given extent assigned to a
specific segment, or its successors, in the order defined in
fsp_get_next_extent. */
static
ulint
fseg_extent_get_next_page_no(
/*=========================*/
/* next used page number in the given extent
or a successor of it, FIL_NULL if no page
found */
fseg_header_t* header, /* in: segment header */
xdes_t* descr, /* in: extent descriptor, if this is NULL, the
function returns FIL_NULL */
mtr_t* mtr) /* in: mtr */
{
ulint bit;
UT_NOT_USED(header);
ut_ad((descr == NULL) || (xdes_get_state(descr, mtr) == XDES_FSEG));
for (;;) {
if (descr == NULL) {
return(FIL_NULL);
}
bit = xdes_find_bit(descr, XDES_FREE_BIT, FALSE, 0, mtr);
if (bit == ULINT_UNDEFINED) {
/* No page found in this extent: the extent is in
FSEG_FREE list, thus, no used page can be found
in successors */
return(FIL_NULL);
} else {
return(xdes_get_offset(descr) + bit);
}
}
}
/*************************************************************************
Gets the last used page number in the given extent assigned to a
specific segment, or its predecessor extents, in the order defined in
fsp_get_next_extent. If the page cannot be found from the extents,
the last page of the fragment list is returned, or FIL_NULL if it is
empty.*/
static
ulint
fseg_extent_get_prev_page_no(
/*=========================*/
/* previous used page number in the given
extent or a predecessor, FIL_NULL
if no page found */
fseg_header_t* header, /* in: segment header */
xdes_t* descr, /* in: extent descriptor, if this is NULL, the
function returns the last page of the fragment
list, if any */
mtr_t* mtr) /* in: mtr */
{
ulint prev_page_no;
ulint bit;
fil_addr_t last_frag_page_addr;
ut_ad((descr == NULL) || (xdes_get_state(descr, mtr) == XDES_FSEG));
for (;;) {
if (descr == NULL) {
prev_page_no = FIL_NULL;
break;
}
bit = xdes_find_bit_downward(descr, XDES_FREE_BIT, FALSE,
FSP_EXTENT_SIZE - 1, mtr);
if (bit == ULINT_UNDEFINED) {
descr = fseg_get_prev_extent(header, descr, mtr);
} else {
prev_page_no = xdes_get_offset(descr) + bit;
break;
}
}
if (prev_page_no == FIL_NULL) {
last_frag_page_addr = flst_get_last(header + FSEG_FRAG, mtr);
prev_page_no = last_frag_page_addr.page;
}
return(prev_page_no);
}
/**************************************************************************
Returns the page number of the first segment page. If no pages have been
freed from the segment, and the pages were allocated with the hint page
number always one greater than previous page, then it is guaranteed that
this function returns the first allocated page. */
ulint
fseg_get_first_page_no(
/*===================*/
/* out: page number, FIL_NULL if no
page found */
fseg_header_t* header, /* in: segment header */
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
ulint first_page_no;
xdes_t* descr;
fil_addr_t first_frag_page_addr;
ut_ad(header);
mtr_x_lock(&fsp_latch, mtr);
block = buf_block_align(header);
buf_page_x_lock(block, mtr);
/* Find first page */
first_frag_page_addr = flst_get_first(header + FSEG_FRAG, mtr);
first_page_no = first_frag_page_addr.page;
if (first_page_no == FIL_NULL) {
descr = fseg_get_first_extent(header, mtr);
first_page_no = fseg_extent_get_next_page_no(header, descr,
mtr);
}
return(first_page_no);
}
/**************************************************************************
Returns the page number of the last segment page. If no pages have been
freed from the segment, and the pages were allocated with the hint page
number always one greater than previous page, then it is guaranteed that
this function returns the last allocated page. */
ulint
fseg_get_last_page_no(
/*==================*/
/* out: page number, FIL_NULL if no
page found */
fseg_header_t* header, /* in: segment header */
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
ulint last_page_no;
xdes_t* descr;
ut_ad(header);
mtr_x_lock(&fsp_latch, mtr);
block = buf_block_align(header);
buf_page_x_lock(block, mtr);
descr = fseg_get_last_non_free_extent(header, mtr);
last_page_no = fseg_extent_get_prev_page_no(header, descr, mtr);
return(last_page_no);
}
/**************************************************************************
Returns the page number of the next segment page. If no pages have been
freed from the segment, and the pages were allocated with the hint page
number always one greater than previous page, then it is guaranteed that
this function steps the pages through in the order they were allocated
to the segment. */
ulint
fseg_get_next_page_no(
/*==================*/
/* out: page number, FIL_NULL if no
page left */
fseg_header_t* header, /* in: segment header */
ulint page_no,/* in: previous page number */
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
buf_frame_t* frame;
ulint space;
ulint next_page_no;
xdes_t* descr;
ulint bit;
fil_addr_t next_frag_page_addr;
fseg_page_header_t* page_header;
ut_ad(header);
mtr_x_lock(&fsp_latch, mtr);
block = buf_block_align(header);
buf_page_x_lock(block, mtr);
space = buf_page_get_space(block);
descr = xdes_get_descriptor(space, page_no, mtr);
ut_ad(xdes_get_bit(descr, XDES_FREE_BIT,
page_no % FSP_EXTENT_SIZE, mtr) == FALSE);
if (xdes_get_state(descr, mtr) == XDES_FSEG) {
/* The extent of the current page belongs to the segment */
bit = xdes_find_bit(descr, XDES_FREE_BIT, FALSE,
(page_no + 1) % FSP_EXTENT_SIZE,
mtr);
if ((bit == ULINT_UNDEFINED)
|| (bit <= (page_no % FSP_EXTENT_SIZE))) {
/* No higher address pages in this extent */
descr = fseg_get_next_extent(header, descr, mtr);
next_page_no = fseg_extent_get_next_page_no(
header, descr, mtr);
} else {
next_page_no = xdes_get_offset(descr) + bit;
}
} else {
/* Current page is a fragment page */
block = buf_page_get(space, page_no, mtr);
buf_page_x_lock(block, mtr);
frame = buf_block_get_frame(block);
page_header = frame + FSEG_PAGE_HEADER_OFFSET;
next_frag_page_addr = flst_get_next_addr(
page_header + FSEG_PAGE_FRAG_NODE,
mtr);
next_page_no = next_frag_page_addr.page;
if (next_page_no == FIL_NULL) {
descr = fseg_get_first_extent(header, mtr);
next_page_no = fseg_extent_get_next_page_no(
header, descr, mtr);
}
}
return(next_page_no);
}
/**************************************************************************
Returns the page number of the previous segment page. If no pages have been
freed from the segment, and the pages were allocated with the hint page
number always one greater than the previous page, then it is guaranteed that
this function steps through the pages in the order opposite to the allocation
order of the pages. */
ulint
fseg_get_prev_page_no(
/*==================*/
/* out: page number, FIL_NULL if no page
left */
fseg_header_t* header, /* in: segment header */
ulint page_no,/* in: page number */
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
buf_frame_t* frame;
ulint space;
ulint prev_page_no;
xdes_t* descr;
ulint bit;
fil_addr_t prev_frag_page_addr;
fseg_page_header_t* page_header;
ut_ad(header);
mtr_x_lock(&fsp_latch, mtr);
block = buf_block_align(header);
buf_page_x_lock(block, mtr);
space = buf_page_get_space(block);
descr = xdes_get_descriptor(space, page_no, mtr);
ut_ad(xdes_get_bit(descr, XDES_FREE_BIT,
page_no % FSP_EXTENT_SIZE, mtr) == FALSE);
if (xdes_get_state(descr, mtr) == XDES_FSEG) {
/* The extent of the current page belongs to the segment */
bit = xdes_find_bit_downward(descr, XDES_FREE_BIT, FALSE,
(page_no - 1) % FSP_EXTENT_SIZE,
mtr);
if ((bit == ULINT_UNDEFINED)
|| (bit >= (page_no % FSP_EXTENT_SIZE))) {
/* No lower address pages in this extent */
descr = fseg_get_prev_extent(header, descr, mtr);
prev_page_no = fseg_extent_get_prev_page_no(
header, descr, mtr);
} else {
prev_page_no = xdes_get_offset(descr) + bit;
}
} else {
/* Current page is a fragment page */
block = buf_page_get(space, page_no, mtr);
buf_page_x_lock(block, mtr);
frame = buf_block_get_frame(block);
page_header = frame + FSEG_PAGE_HEADER_OFFSET;
prev_frag_page_addr = flst_get_prev_addr(
page_header + FSEG_PAGE_FRAG_NODE,
mtr);
prev_page_no = prev_frag_page_addr.page;
}
return(prev_page_no);
}
#endif
/***********************************************************************
Validates a segment. */
static
bool
fseg_validate_low(
/*==============*/
/* out: TRUE if ok */
fseg_header_t* header, /* in: segment header */
mtr_t* mtr2) /* in: mtr */
{
ulint space;
dulint seg_id;
mtr_t mtr;
xdes_t* descr;
fil_addr_t node_addr;
ulint n_used = 0;
ulint n_used2 = 0;
flst_node_t* node;
buf_frame_t* frame;
fseg_page_header_t* page_header;
ut_ad(mtr_memo_contains(mtr2, buf_block_align(header),
MTR_MEMO_BUF_FIX));
buf_page_x_lock(buf_block_align(header), mtr2);
space = buf_page_get_space(buf_block_align(header));
seg_id = mtr_read_dulint(header + FSEG_ID, MLOG_8BYTES, mtr2);
n_used = mtr_read_ulint(header + FSEG_NOT_FULL_N_USED,
MLOG_4BYTES, mtr2);
flst_validate(header + FSEG_FRAG, mtr2);
flst_validate(header + FSEG_FREE, mtr2);
flst_validate(header + FSEG_NOT_FULL, mtr2);
flst_validate(header + FSEG_FULL, mtr2);
/* Validate FSEG_FREE list */
node_addr = flst_get_first(header + FSEG_FREE, mtr2);
while (!fil_addr_is_null(node_addr)) {
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
ut_a(xdes_get_n_used(descr, &mtr) == 0);
ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
ut_a(0 == ut_dulint_cmp(
mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES,
&mtr), seg_id));
node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
mtr_commit(&mtr);
}
/* Validate FSEG_NOT_FULL list */
node_addr = flst_get_first(header + FSEG_NOT_FULL, mtr2);
while (!fil_addr_is_null(node_addr)) {
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
ut_a(xdes_get_n_used(descr, &mtr) > 0);
ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
ut_a(0 == ut_dulint_cmp(
mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES,
&mtr), seg_id));
n_used2 += xdes_get_n_used(descr, &mtr);
node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
mtr_commit(&mtr);
}
/* Validate FSEG_FULL list */
node_addr = flst_get_first(header + FSEG_FULL, mtr2);
while (!fil_addr_is_null(node_addr)) {
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
ut_a(0 == ut_dulint_cmp(
mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES,
&mtr), seg_id));
node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
mtr_commit(&mtr);
}
/* Validate FSEG_FRAG list */
node_addr = flst_get_first(header + FSEG_FRAG, mtr2);
while (!fil_addr_is_null(node_addr)) {
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
node = fut_get_ptr_x_lock(space, node_addr, &mtr);
frame = buf_frame_align(node);
page_header = frame + FSEG_PAGE_HEADER_OFFSET;
ut_a(0 == ut_dulint_cmp(
mtr_read_dulint(page_header + FSEG_PAGE_SEG_ID,
MLOG_8BYTES, &mtr), seg_id));
node_addr = flst_get_next_addr(node, &mtr);
mtr_commit(&mtr);
}
ut_a(n_used == n_used2);
return(TRUE);
}
/***********************************************************************
Validates a segment. */
bool
fseg_validate(
/*==========*/
/* out: TRUE if ok */
fseg_header_t* header, /* in: segment header */
mtr_t* mtr2) /* in: mtr */
{
bool ret;
mtr_x_lock(&fsp_latch, mtr2);
ret = fseg_validate_low(header, mtr2);
return(ret);
}
/***********************************************************************
Writes info of a segment. */
static
void
fseg_print_low(
/*===========*/
fseg_header_t* header, /* in: segment header */
mtr_t* mtr) /* in: mtr */
{
ulint space;
ulint seg_id_low;
ulint seg_id_high;
ulint n_used;
ulint n_frag;
ulint n_free;
ulint n_not_full;
ulint n_full;
ulint reserved;
ulint used;
ulint page_no;
ut_ad(mtr_memo_contains(mtr, buf_block_align(header),
MTR_MEMO_BUF_FIX));
buf_page_x_lock(buf_block_align(header), mtr);
space = buf_page_get_space(buf_block_align(header));
page_no = buf_page_get_offset(buf_block_align(header));
reserved = fseg_n_reserved_pages_low(header, &used, mtr);
seg_id_low = ut_dulint_get_low(mtr_read_dulint(header + FSEG_ID,
MLOG_8BYTES, mtr));
seg_id_high = ut_dulint_get_high(mtr_read_dulint(header + FSEG_ID,
MLOG_8BYTES, mtr));
n_used = mtr_read_ulint(header + FSEG_NOT_FULL_N_USED,
MLOG_4BYTES, mtr);
n_frag = flst_get_len(header + FSEG_FRAG, mtr);
n_free = flst_get_len(header + FSEG_FREE, mtr);
n_not_full = flst_get_len(header + FSEG_NOT_FULL, mtr);
n_full = flst_get_len(header + FSEG_FULL, mtr);
printf(
"SEGMENT id %lu %lu space %lu; page %lu; res %lu used %lu; full ext %lu\n",
seg_id_high, seg_id_low, space, page_no, reserved, used,
n_full);
printf(
"fragm pages %lu; free extents %lu; not full extents %lu: pages %lu\n",
n_frag, n_free, n_not_full, n_used);
}
/***********************************************************************
Writes info of a segment. */
void
fseg_print(
/*=======*/
fseg_header_t* header, /* in: segment header */
mtr_t* mtr) /* in: mtr */
{
mtr_x_lock(&fsp_latch, mtr);
fseg_print_low(header, mtr);
}
/***********************************************************************
Validates the file space system and its segments. */
bool
fsp_validate(
/*=========*/
/* out: TRUE if ok */
ulint space) /* in: space id */
{
fsp_header_t* header;
fseg_header_t* seg_header;
ulint size;
ulint free_limit;
ulint frag_n_used;
mtr_t mtr;
mtr_t mtr2;
xdes_t* descr;
fil_addr_t node_addr;
ulint descr_count = 0;
ulint n_used = 0;
ulint n_used2 = 0;
ulint n_full_frag_pages;
/* Start first a mini-transaction mtr2 to lock out all other threads
from the fsp system */
mtr_start(&mtr2);
mtr_x_lock(&fsp_latch, &mtr2);
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
header = fsp_get_space_header(space, &mtr);
size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT,
MLOG_4BYTES, &mtr);
frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
MLOG_4BYTES, &mtr);
n_full_frag_pages = FSP_EXTENT_SIZE *
flst_get_len(header + FSP_FULL_FRAG, &mtr);
ut_a(free_limit <= size);
flst_validate(header + FSP_FREE, &mtr);
flst_validate(header + FSP_FREE_FRAG, &mtr);
flst_validate(header + FSP_FULL_FRAG, &mtr);
flst_validate(header + FSP_SEGS, &mtr);
mtr_commit(&mtr);
/* Validate FSP_FREE list */
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
header = fsp_get_space_header(space, &mtr);
node_addr = flst_get_first(header + FSP_FREE, &mtr);
mtr_commit(&mtr);
while (!fil_addr_is_null(node_addr)) {
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
descr_count++;
descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
ut_a(xdes_get_n_used(descr, &mtr) == 0);
ut_a(xdes_get_state(descr, &mtr) == XDES_FREE);
node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
mtr_commit(&mtr);
}
/* Validate FSP_FREE_FRAG list */
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
header = fsp_get_space_header(space, &mtr);
node_addr = flst_get_first(header + FSP_FREE_FRAG, &mtr);
mtr_commit(&mtr);
while (!fil_addr_is_null(node_addr)) {
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
descr_count++;
descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
ut_a(xdes_get_n_used(descr, &mtr) > 0);
ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
ut_a(xdes_get_state(descr, &mtr) == XDES_FREE_FRAG);
n_used += xdes_get_n_used(descr, &mtr);
node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
mtr_commit(&mtr);
}
/* Validate FSP_FULL_FRAG list */
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
header = fsp_get_space_header(space, &mtr);
node_addr = flst_get_first(header + FSP_FULL_FRAG, &mtr);
mtr_commit(&mtr);
while (!fil_addr_is_null(node_addr)) {
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
descr_count++;
descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
ut_a(xdes_get_state(descr, &mtr) == XDES_FULL_FRAG);
node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
mtr_commit(&mtr);
}
/* Validate segments */
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
header = fsp_get_space_header(space, &mtr);
node_addr = flst_get_first(header + FSP_SEGS, &mtr);
mtr_commit(&mtr);
while (!fil_addr_is_null(node_addr)) {
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
seg_header = fut_get_ptr_x_lock(space, node_addr,
&mtr) - FSEG_FLST_NODE;
fseg_validate_low(seg_header, &mtr);
descr_count += flst_get_len(seg_header + FSEG_FREE, &mtr);
descr_count += flst_get_len(seg_header + FSEG_FULL, &mtr);
descr_count += flst_get_len(seg_header + FSEG_NOT_FULL, &mtr);
n_used2 += flst_get_len(seg_header + FSEG_FRAG, &mtr);
node_addr = flst_get_next_addr(seg_header + FSEG_FLST_NODE,
&mtr);
mtr_commit(&mtr);
}
ut_a(descr_count * FSP_EXTENT_SIZE == free_limit);
ut_a(n_used + n_full_frag_pages
== n_used2 + (free_limit + XDES_DESCRIBED_PER_PAGE - 1)
/ XDES_DESCRIBED_PER_PAGE);
ut_a(frag_n_used == n_used);
mtr_commit(&mtr2);
return(TRUE);
}
/***********************************************************************
Prints info of a file space. */
void
fsp_print(
/*======*/
ulint space) /* in: space id */
{
fsp_header_t* header;
fseg_header_t* seg_header;
ulint size;
ulint free_limit;
ulint frag_n_used;
mtr_t mtr;
mtr_t mtr2;
fil_addr_t node_addr;
ulint n_free;
ulint n_free_frag;
ulint n_full_frag;
ulint n_segs;
ulint seg_id_low;
ulint seg_id_high;
/* Start first a mini-transaction mtr2 to lock out all other threads
from the fsp system */
mtr_start(&mtr2);
mtr_x_lock(&fsp_latch, &mtr2);
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
header = fsp_get_space_header(space, &mtr);
size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT,
MLOG_4BYTES, &mtr);
frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
MLOG_4BYTES, &mtr);
n_free = flst_get_len(header + FSP_FREE, &mtr);
n_free_frag = flst_get_len(header + FSP_FREE_FRAG, &mtr);
n_full_frag = flst_get_len(header + FSP_FULL_FRAG, &mtr);
n_segs = flst_get_len(header + FSP_SEGS, &mtr);
seg_id_low = ut_dulint_get_low(mtr_read_dulint(header + FSP_SEG_ID,
MLOG_8BYTES, &mtr));
seg_id_high = ut_dulint_get_high(mtr_read_dulint(header + FSP_SEG_ID,
MLOG_8BYTES, &mtr));
printf("FILE SPACE INFO: id %lu\n", space);
printf("size %lu, free limit %lu, free extents %lu\n",
size, free_limit, n_free);
printf(
"not full frag extents %lu: used pages %lu, full frag extents %lu\n",
n_free_frag, frag_n_used, n_full_frag);
printf("number of segments %lu, first seg id not used %lu %lu\n",
n_segs, seg_id_high, seg_id_low);
/* Print segments */
node_addr = flst_get_first(header + FSP_SEGS, &mtr);
mtr_commit(&mtr);
while (!fil_addr_is_null(node_addr)) {
mtr_start(&mtr);
mtr_x_lock(&fsp_latch, &mtr);
seg_header = fut_get_ptr_x_lock(space, node_addr,
&mtr) - FSEG_FLST_NODE;
fseg_print_low(seg_header, &mtr);
node_addr = flst_get_next_addr(seg_header + FSEG_FLST_NODE,
&mtr);
mtr_commit(&mtr);
}
mtr_commit(&mtr2);
}
......@@ -66,28 +66,6 @@ sess_open(
byte* addr_buf, /* in: client address */
ulint addr_len); /* in: client address length */
/*************************************************************************
Closes a session, freeing the memory occupied by it. */
void
sess_close(
/*=======*/
sess_t* sess); /* in, own: session object */
/*************************************************************************
Raises an SQL error. */
void
sess_raise_error_low(
/*=================*/
trx_t* trx, /* in: transaction */
ulint err_no, /* in: error number */
ulint type, /* in: more info of the error, or 0 */
dict_table_t* table, /* in: dictionary table or NULL */
dict_index_t* index, /* in: table index or NULL */
dtuple_t* tuple, /* in: tuple to insert or NULL */
rec_t* rec, /* in: record or NULL */
char* err_str);/* in: arbitrary null-terminated error string,
or NULL */
/*************************************************************************
Closes a session, freeing the memory occupied by it, if it is in a state
where it should be closed. */
......@@ -117,16 +95,6 @@ sess_srv_msg_send_simple(
ulint rel_kernel); /* in: SESS_RELEASE_KERNEL or
SESS_NOT_RELEASE_KERNEL */
/***************************************************************************
Processes a message from a client. NOTE: May release the kernel mutex
temporarily. */
void
sess_receive_msg_rel_kernel(
/*========================*/
sess_t* sess, /* in: session */
byte* str, /* in: message string */
ulint len); /* in: message length */
/***************************************************************************
When a command has been completed, this function sends the message about it
to the client. */
......@@ -136,17 +104,6 @@ sess_command_completed_message(
sess_t* sess, /* in: session */
byte* msg, /* in: message buffer */
ulint len); /* in: message data length */
/***********************************************************************
Starts a new connection and a session, or starts a query based on a client
message. This is called by a SRV_COM thread. */
void
sess_process_cli_msg(
/*=================*/
byte* str, /* in: message string */
ulint len, /* in: string length */
byte* addr, /* in: address string */
ulint alen); /* in: address length */
/* The session handle. All fields are protected by the kernel mutex */
......
......@@ -3063,11 +3063,6 @@ lock_deadlock_occurs(
ut_a(strlen(lock_latest_err_buf) < 4100);
/*
sess_raise_error_low(trx, DB_DEADLOCK, lock->type_mode, table,
index, NULL, NULL, NULL);
*/
return(TRUE);
}
......
/******************************************************
Recovery
(c) 1997 Innobase Oy
Created 9/20/1997 Heikki Tuuri
*******************************************************/
#include "log0recv.h"
#ifdef UNIV_NONINL
#include "log0recv.ic"
#endif
#include "mem0mem.h"
#include "buf0buf.h"
#include "buf0flu.h"
#include "srv0srv.h"
/* Size of block reads when the log groups are scanned forward to do
roll-forward */
#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
/* Size of block reads when the log groups are scanned backwards to synchronize
them */
#define RECV_BACK_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
recv_sys_t* recv_sys = NULL;
recv_recover_page(block->frame, block->space, block->offset);
/************************************************************
Creates the recovery system. */
void
recv_sys_create(void)
/*=================*/
{
ut_a(recv_sys == NULL);
recv_sys = mem_alloc(sizeof(recv_t));
mutex_create(&(recv_sys->mutex));
recv_sys->hash = NULL;
recv_sys->heap = NULL;
}
/************************************************************
Inits the recovery system for a recovery operation. */
void
recv_sys_init(void)
/*===============*/
{
recv_sys->hash = hash_create(buf_pool_get_curr_size() / 64);
recv_sys->heap = mem_heap_create_in_buffer(256);
}
/************************************************************
Empties the recovery system. */
void
recv_sys_empty(void)
/*================*/
{
mutex_enter(&(recv_sys->mutex));
hash_free(recv_sys->hash);
mem_heap_free(recv_sys->heap);
recv_sys->hash = NULL;
recv_sys->heap = NULL;
mutex_exit(&(recv_sys->mutex));
}
/***********************************************************
For recovery purposes copies the log buffer to a group to synchronize log
data. */
static
void
recv_log_buf_flush(
/*===============*/
log_group_t* group, /* in: log group */
dulint start_lsn, /* in: start lsn of the log data in
the log buffer; must be divisible by
OS_FILE_LOG_BLOCK_SIZE */
dulint end_lsn) /* in: end lsn of the log data in the
log buffer; must be divisible by
OS_FILE_LOG_BLOCK_SIZE */
{
ulint len;
ut_ad(mutex_own(&(log_sys->mutex)));
len = ut_dulint_minus(end_lsn, start_lsn);
log_group_write_buf(LOG_RECOVER, group, log_sys->buf, len, start_lsn,
0);
}
/***********************************************************
Compares two buffers containing log segments and determines the highest lsn
where they match, if any. */
static
dulint
recv_log_bufs_cmp(
/*==============*/
/* out: if no match found, ut_dulint_zero or
if start_lsn == LOG_START_LSN, returns
LOG_START_LSN; otherwise the highest matching
lsn */
byte* recv_buf, /* in: buffer containing valid log data */
byte* buf, /* in: buffer of data from a possibly
incompletely written log group */
dulint start_lsn, /* in: buffer start lsn, must be divisible
by OS_FILE_LOG_BLOCK_SIZE and must be >=
LOG_START_LSN */
dulint end_lsn, /* in: buffer end lsn, must be divisible
by OS_FILE_LOG_BLOCK_SIZE */
dulint recovered_lsn) /* in: recovery succeeded up to this lsn */
{
ulint len;
ulint offset;
byte* log_block1;
byte* log_block2;
ulint no;
ulint data_len;
ut_ad(ut_dulint_cmp(start_lsn, LOG_START_LSN) >= 0);
if (ut_dulint_cmp(end_lsn, recovered_lsn) > 0) {
end_lsn = ut_dulint_align_up(recovered_lsn,
OS_FILE_LOG_BLOCK_SIZE);
}
len = ut_dulint_minus(end_lsn, start_lsn);
if (len == 0) {
goto no_match;
}
ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
log_block1 = recv_buf + len;
log_block2 = buf + len;
for (;;) {
log_block1 -= OS_FILE_LOG_BLOCK_SIZE;
log_block2 -= OS_FILE_LOG_BLOCK_SIZE;
no = log_block_get_hdr_no(log_block1);
ut_a(no == log_block_get_trl_no(log_block1));
if ((no == log_block_get_hdr_no(log_block2))
&& (no == log_block_get_trl_no(log_block2))) {
/* Match found if the block is not corrupted */
data_len = log_block_get_data_len(log_block2);
if (0 == ut_memcmp(log_block1 + LOG_BLOCK_DATA,
log_block2 + LOG_BLOCK_DATA,
data_len - LOG_BLOCK_DATA)) {
/* Match found */
return(ut_dulint_add(start_lsn,
log_block2 - buf + data_len));
}
}
if (log_block1 == recv_buf) {
/* No match found */
break;
}
}
no_match:
if (ut_dulint_cmp(start_lsn, LOG_START_LSN) == 0) {
return(LOG_START_LSN);
}
return(ut_dulint_zero);
}
/************************************************************
Copies a log segment from the most up-to-date log group to the other log
group, so that it contains the latest log data. */
static
void
recv_copy_group(
/*============*/
log_group_t* up_to_date_group, /* in: the most up-to-date
log group */
log_group_t* group, /* in: copy to this log group */
dulint_lsn recovered_lsn) /* in: recovery succeeded up
to this lsn */
{
dulint start_lsn;
dulint end_lsn;
dulint match;
byte* buf;
byte* buf1;
ut_ad(mutex_own(&(log_sys->mutex)));
if (0 == ut_dulint_cmp(LOG_START_LSN, recovered_lsn)) {
return;
}
ut_ad(RECV_BACK_SCAN_SIZE <= log_sys->buf_size);
buf1 = mem_alloc(2 * RECV_BACK_SCAN_SIZE);
buf = ut_align(buf, RECV_BACK_SCAN_SIZE););
end_lsn = ut_dulint_align_up(recovered_lsn, RECV_BACK_SCAN_SIZE);
match = ut_dulint_zero;
for (;;) {
if (ut_dulint_cmp(ut_dulint_add(LOG_START_LSN,
RECV_BACK_SCAN_SIZE), end_lsn) >= 0) {
start_lsn = LOG_START_LSN;
} else {
start_lsn = ut_dulint_subtract(end_lsn,
RECV_BACK_SCAN_SIZE);
}
log_group_read_log_seg(LOG_RECOVER, buf, group, start_lsn,
end_lsn);
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
up_to_date_group, start_lsn, end_lsn);
match = recv_log_bufs_cmp(log_sys->buf, buf, start_lsn,
end_lsn, recovered_lsn);
if (ut_dulint_cmp(match, recovered_lsn) != 0) {
recv_log_buf_flush(group, start_lsn, end_lsn);
}
if (!ut_dulint_zero(match)) {
mem_free(buf1);
return;
}
end_lsn = start_lsn;
}
}
/************************************************************
Copies a log segment from the most up-to-date log group to the other log
groups, so that they all contain the latest log data. Also writes the info
about the latest checkpoint to the groups, and inits the fields in the group
memory structs to up-to-date values. */
void
recv_synchronize_groups(
/*====================*/
log_group_t* up_to_date_group, /* in: the most up-to-date
log group */
dulint_lsn recovered_lsn, /* in: recovery succeeded up
to this lsn */
log_group_t* max_checkpoint_group) /* in: the group with the most
recent checkpoint info */
{
log_group_t* group;
ut_ad(mutex_own(&(log_sys->mutex)));
group = UT_LIST_GET_FIRST(log_sys->log_groups);
while (group) {
if (group != up_to_date_group) {
/* Copy log data */
recv_copy_group(group, up_to_date_group,
recovered_lsn);
}
if (group != max_checkpoint_group) {
/* Copy the checkpoint info to the group */
log_group_checkpoint(group);
mutex_exit(&(log_sys->mutex));
/* Wait for the checkpoint write to complete */
rw_lock_s_lock(&(log_sys->checkpoint_lock));
rw_lock_s_unlock(&(log_sys->checkpoint_lock));
mutex_enter(&(log_sys->mutex));
}
/* Update the fields in the group struct to correspond to
recovered_lsn */
log_group_set_fields(group, recovered_lsn);
group = UT_LIST_GET_NEXT(log_groups, group);
}
}
/************************************************************
Looks for the maximum consistent checkpoint from the log groups. */
static
ulint
recv_find_max_checkpoint(
/*=====================*/
/* out: error code or DB_SUCCESS */
log_group_t** max_group, /* out: max group */
ulint* max_field) /* out: LOG_CHECKPOINT_1 or
LOG_CHECKPOINT_2 */
{
log_group_t* group;
dulint max_no;
dulint cp_no;
ulint field;
ulint fold;
byte* buf;
ut_ad(mutex_own(&(log_sys->mutex)));
/* Look for the latest checkpoint from the log groups */
group = UT_LIST_GET_FIRST(log_sys->log_groups);
checkpoint_no = ut_dulint_zero;
checkpoint_lsn = ut_dulint_zero;
*max_group = NULL;
buf = log_sys->checkpoint_buf;
while (group) {
group->state = LOG_GROUP_CORRUPTED;
for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
log_group_read_checkpoint_info(group, field);
/* Check the consistency of the checkpoint info */
fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
if (fold != mach_read_from_4(buf
+ LOG_CHECKPOINT_CHECKSUM_1)) {
goto not_consistent;
}
fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
LOG_CHECKPOINT_CHECKSUM_2
- LOG_CHECKPOINT_LSN);
if (fold != mach_read_from_4(buf
+ LOG_CHECKPOINT_CHECKSUM_2)) {
goto not_consistent;
}
group->state = LOG_GROUP_OK;
group->lsn = mach_read_from_8(buf
+ LOG_CHECKPOINT_LSN);
group->lsn_offset = mach_read_from_4(buf
+ LOG_CHECKPOINT_OFFSET);
group->lsn_file_count = mach_read_from_4(
buf + LOG_CHECKPOINT_FILE_COUNT);
cp_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
if (ut_dulint_cmp(cp_no, max_no) >= 0) {
*max_group = group;
*max_field = field;
max_no = cp_no;
}
not_consistent:
}
group = UT_LIST_GET_NEXT(log_groups, group);
}
if (*max_group == NULL) {
return(DB_ERROR);
}
return(DB_SUCCESS);
}
/***********************************************************
Parses log records from a buffer and stores them to a hash table to wait
merging to file pages. If the hash table becomes too big, merges automatically
it to file pages. */
static
bool
recv_parse_and_hash_log_recs(
/*=========================*/
/* out: TRUE if limit_lsn has been reached */
byte* buf, /* in: buffer containing a log segment or
garbage */
ulint len, /* in: buffer length */
dulint start_lsn, /* in: buffer start lsn */
dulint limit_lsn, /* in: recover at least to this lsn */
dulint* recovered_lsn) /* out: was able to parse up to this lsn */
{
}
/************************************************************
Recovers from a checkpoint. When this function returns, the database is able
to start processing new user transactions, but the function
recv_recovery_from_checkpoint_finish should be called later to complete
the recovery and free the resources used in it. */
ulint
recv_recovery_from_checkpoint_start(
/*================================*/
/* out: error code or DB_SUCCESS */
dulint limit_lsn) /* in: recover up to this lsn if possible */
{
log_group_t* max_cp_group;
log_group_t* up_to_date_group;
ulint max_cp_field;
byte* buf;
ulint err;
dulint checkpoint_lsn;
dulint checkpoint_no;
dulint recovered_lsn;
dulint old_lsn;
dulint end_lsn;
dulint start_lsn;
bool finished;
dulint flush_start_lsn;
mutex_enter(&(log_sys->mutex));
/* Look for the latest checkpoint from any of the log groups */
err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
if (err != DB_SUCCESS) {
mutex_exit(&(log_sys->mutex));
return(err);
}
log_group_read_checkpoint_info(max_cp_group, max_cp_field);
buf = log_sys->checkpoint_buf;
checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
if (ut_dulint_cmp(limit_lsn, checkpoint_lsn) < 0) {
mutex_exit(&(log_sys->mutex));
return(DB_ERROR);
}
/* Start reading the log groups from the checkpoint lsn up. The
variable flush_start_lsn tells a lsn up to which the log is known
to be contiguously written in all log groups. */
recovered_lsn = checkpoint_lsn;
flush_start_lsn = ut_dulint_align_down(checkpoint_lsn,
OS_FILE_LOG_BLOCK_SIZE);
up_to_date_group = max_cp_group;
ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
group = UT_LIST_GET_FIRST(log_sys->log_groups);
while (group) {
finished = FALSE;
if (group->state == LOG_GROUP_CORRUPTED) {
finished = TRUE;
}
start_lsn = flush_start_lsn;
while (!finished) {
end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
group, start_lsn, end_lsn);
old_lsn = recovered_lsn;
finished = recv_parse_and_hash_log_recs(log_sys->buf,
RECV_SCAN_SIZE, start_lsn,
limit_lsn, &flush_start_lsn,
&recovered_lsn);
if (ut_dulint_cmp(recovered_lsn, old_lsn) > 0) {
/* We found a more up-to-date group */
up_to_date_group = group;
}
start_lsn = end_lsn;
}
group = UT_LIST_GET_NEXT(log_groups, group);
}
/* Delete possible corrupted or extra log records from all log
groups */
recv_truncate_groups(recovered_lsn);
/* Synchronize the uncorrupted log groups to the most up-to-date log
group; we may also have to copy checkpoint info to groups */
log_sys->next_checkpoint_lsn = checkpoint_lsn;
log_sys->next_checkpoint_no = checkpoint_no;
recv_synchronize_groups(up_to_date_group, _lsn, max_cp_group);
log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
/* The database is now ready to start almost normal processing of user
transactions */
return(DB_SUCCESS);
}
/************************************************************
Completes recovery from a checkpoint. */
void
recv_recovery_from_checkpoint_finish(void)
/*======================================*/
{
/* Rollback the uncommitted transactions which have no user session */
trx_rollback_all_without_sess();
/* Merge the hashed log records */
recv_merge_hashed_log_recs();
/* Free the resources of the recovery system */
recv_sys_empty();
}
/****************************************************************
Writes to the log a record about incrementing the row id counter. */
UNIV_INLINE
void
log_write_row_id_incr_rec(void)
/*===========================*/
{
log_t* log = log_sys;
ulint data_len;
mutex_enter(&(log->mutex));
data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + 1;
if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
/* The string does not fit within the current log block
or the the block would become full */
mutex_exit(&(log->mutex));
log_write_row_id_incr_rec_slow();
return;
}
*(log->buf + log->buf_free) = MLOG_INCR_ROW_ID | MLOG_SINGLE_REC_FLAG;
log_block_set_data_len(ut_align_down(log->buf + log->buf_free,
OS_FILE_LOG_BLOCK_SIZE),
data_len);
#ifdef UNIV_LOG_DEBUG
log->old_buf_free = log->buf_free;
log->old_lsn = log->lsn;
log_check_log_recs(log->buf + log->buf_free, 1, log->lsn);
#endif
log->buf_free++;
ut_ad(log->buf_free <= log->buf_size);
UT_DULINT_INC(log->lsn);
mutex_exit(&(log->mutex));
}
/****************************************************************
Writes to the log a record about incrementing the row id counter. */
static
void
log_write_row_id_incr_rec_slow(void)
/*================================*/
{
byte type;
log_reserve_and_open(1);
type = MLOG_INCR_ROW_ID | MLOG_SINGLE_REC_FLAG;
log_write_low(&type, 1);
log_close();
log_release();
}
/**************************************************************************
Parses and applies a log record MLOG_SET_ROW_ID. */
byte*
dict_hdr_parse_set_row_id(
/*======================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
page_t* page) /* in: page or NULL */
{
dulint dval;
ptr = mach_dulint_parse_compressed(ptr, end_ptr, &dval);
if (ptr == NULL) {
return(NULL);
}
if (!page) {
return(ptr);
}
mach_write_to_8(page + DICT_HDR + DICT_HDR_ROW_ID, dval);
return(ptr);
}
/******************************************************
The interface to the operating system file i/o primitives
(c) 1995 Innobase Oy
Created 10/21/1995 Heikki Tuuri
*******************************************************/
#include "os0file.h"
#include "os0sync.h"
#include "ut0mem.h"
#ifndef __WIN__
#include <errno.h>
#endif
/* We use these mutexes to protect lseek + file i/o operation, if the
OS does not provide an atomic pread or pwrite, or similar */
#define OS_FILE_N_SEEK_MUTEXES 16
os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
/* In simulated aio, merge at most this many consecutive i/os */
#define OS_AIO_MERGE_N_CONSECUTIVE 32
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads */
bool os_aio_use_native_aio = FALSE;
/* The aio array slot structure */
typedef struct os_aio_slot_struct os_aio_slot_t;
struct os_aio_slot_struct{
bool is_read; /* TRUE if a read operation */
ulint pos; /* index of the slot in the aio
array */
bool reserved; /* TRUE if this slot is reserved */
ulint len; /* length of the block to read or
write */
byte* buf; /* buffer used in i/o */
ulint type; /* OS_FILE_READ or OS_FILE_WRITE */
ulint offset; /* 32 low bits of file offset in
bytes */
ulint offset_high; /* 32 high bits of file offset */
os_file_t file; /* file where to read or write */
char* name; /* file name or path */
bool io_already_done;/* used only in simulated aio:
TRUE if the physical i/o already
made and only the slot message
needs to be passed to the caller
of os_aio_simulated_handle */
void* message1; /* message which is given by the */
void* message2; /* the requester of an aio operation
and which can be used to identify
which pending aio operation was
completed */
#ifdef WIN_ASYNC_IO
OVERLAPPED control; /* Windows control block for the
aio request */
#elif defined(POSIX_ASYNC_IO)
struct aiocb control; /* Posix control block for aio
request */
#endif
};
/* The aio array structure */
typedef struct os_aio_array_struct os_aio_array_t;
struct os_aio_array_struct{
os_mutex_t mutex; /* the mutex protecting the aio array */
os_event_t not_full; /* The event which is set to signaled
state when there is space in the aio
outside the ibuf segment */
ulint n_slots; /* Total number of slots in the aio array.
This must be divisible by n_threads. */
ulint n_segments;/* Number of segments in the aio array of
pending aio requests. A thread can wait
separately for any one of the segments. */
ulint n_reserved;/* Number of reserved slots in the
aio array outside the ibuf segment */
os_aio_slot_t* slots; /* Pointer to the slots in the array */
os_event_t* events; /* Pointer to an array of event handles
where we copied the handles from slots,
in the same order. This can be used in
WaitForMultipleObjects; used only in
Windows */
};
/* Array of events used in simulated aio */
os_event_t* os_aio_segment_wait_events = NULL;
/* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
are NULL when the module has not yet been initialized. */
os_aio_array_t* os_aio_read_array = NULL;
os_aio_array_t* os_aio_write_array = NULL;
os_aio_array_t* os_aio_ibuf_array = NULL;
os_aio_array_t* os_aio_log_array = NULL;
os_aio_array_t* os_aio_sync_array = NULL;
ulint os_aio_n_segments = ULINT_UNDEFINED;
/***************************************************************************
Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
overwrite the error number). If the number is not known to this program,
the OS error number + 100 is returned. */
ulint
os_file_get_last_error(void)
/*========================*/
/* out: error number, or OS error number + 100 */
{
ulint err;
#ifdef __WIN__
err = (ulint) GetLastError();
if (err == ERROR_FILE_NOT_FOUND) {
return(OS_FILE_NOT_FOUND);
} else if (err == ERROR_DISK_FULL) {
return(OS_FILE_DISK_FULL);
} else if (err == ERROR_FILE_EXISTS) {
return(OS_FILE_ALREADY_EXISTS);
} else {
return(100 + err);
}
#else
err = (ulint) errno;
printf("%lu\n", err);
perror("os0file:");
if (err == ENOSPC ) {
return(OS_FILE_DISK_FULL);
#ifdef POSIX_ASYNC_IO
} else if (err == EAGAIN) {
return(OS_FILE_AIO_RESOURCES_RESERVED);
#endif
} else if (err == ENOENT) {
return(OS_FILE_NOT_FOUND);
} else if (err == EEXIST) {
return(OS_FILE_ALREADY_EXISTS);
} else {
return(100 + err);
}
#endif
}
/********************************************************************
Does error handling when a file operation fails. If we have run out
of disk space, then the user can clean the disk. If we do not find
a specified file, then the user can copy it to disk. */
static
bool
os_file_handle_error(
/*=================*/
/* out: TRUE if we should retry the operation */
os_file_t file, /* in: file pointer */
char* name) /* in: name of a file or NULL */
{
int input_char;
ulint err;
err = os_file_get_last_error();
if (err == OS_FILE_DISK_FULL) {
ask_again:
printf("\n");
if (name) {
printf(
"Innobase encountered a problem with file %s.\n",
name);
}
printf("Disk is full. Try to clean the disk to free space\n");
printf("before answering the following: How to continue?\n");
printf("(Y == freed some space: try again)\n");
printf("(N == crash the database: will restart it)?\n");
ask_with_no_question:
input_char = getchar();
if (input_char == (int) 'N') {
ut_error;
return(FALSE);
} else if (input_char == (int) 'Y') {
return(TRUE);
} else if (input_char == (int) '\n') {
goto ask_with_no_question;
} else {
goto ask_again;
}
} else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
return(TRUE);
} else {
ut_error;
}
return(FALSE);
}
/********************************************************************
Opens an existing file or creates a new. */
os_file_t
os_file_create(
/*===========*/
/* out, own: handle to the file, not defined if error,
error number can be retrieved with os_get_last_error */
char* name, /* in: name of the file or path as a null-terminated
string */
ulint create_mode, /* in: OS_FILE_OPEN if an existing file is opened
(if does not exist, error), or OS_FILE_CREATE if a new
file is created (if exists, error), OS_FILE_OVERWRITE
if a new is created or an old overwritten */
ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
is desired, OS_FILE_NORMAL, if any normal file */
bool* success)/* out: TRUE if succeed, FALSE if error */
{
#ifdef __WIN__
os_file_t file;
DWORD create_flag;
DWORD attributes;
bool retry;
try_again:
ut_a(name);
if (create_mode == OS_FILE_OPEN) {
create_flag = OPEN_EXISTING;
} else if (create_mode == OS_FILE_CREATE) {
create_flag = CREATE_NEW;
} else if (create_mode == OS_FILE_OVERWRITE) {
create_flag = CREATE_ALWAYS;
} else {
create_flag = 0;
ut_error;
}
if (purpose == OS_FILE_AIO) {
/* use asynchronous (overlapped) io and no buffering
of writes in the OS */
attributes = 0;
#ifdef WIN_ASYNC_IO
if (os_aio_use_native_aio) {
attributes = attributes | FILE_FLAG_OVERLAPPED;
}
#endif
#ifdef UNIV_NON_BUFFERED_IO
attributes = attributes | FILE_FLAG_NO_BUFFERING;
#endif
} else if (purpose == OS_FILE_NORMAL) {
attributes = 0
#ifdef UNIV_NON_BUFFERED_IO
| FILE_FLAG_NO_BUFFERING
#endif
;
} else {
attributes = 0;
ut_error;
}
file = CreateFile(name,
GENERIC_READ | GENERIC_WRITE, /* read and write
access */
FILE_SHARE_READ,/* file can be read by other
processes */
NULL, /* default security attributes */
create_flag,
attributes,
NULL); /* no template file */
if (file == INVALID_HANDLE_VALUE) {
*success = FALSE;
if (create_mode != OS_FILE_OPEN
&& os_file_get_last_error() == OS_FILE_DISK_FULL) {
retry = os_file_handle_error(file, name);
if (retry) {
goto try_again;
}
}
} else {
*success = TRUE;
}
return(file);
#else
os_file_t file;
int create_flag;
bool retry;
try_again:
ut_a(name);
if (create_mode == OS_FILE_OPEN) {
create_flag = O_RDWR;
} else if (create_mode == OS_FILE_CREATE) {
create_flag = O_RDWR | O_CREAT | O_EXCL;
} else if (create_mode == OS_FILE_OVERWRITE) {
create_flag = O_RDWR | O_CREAT | O_TRUNC;
} else {
create_flag = 0;
ut_error;
}
UT_NOT_USED(purpose);
if (create_mode == OS_FILE_CREATE) {
file = open(name, create_flag, S_IRWXU | S_IRWXG | S_IRWXO);
} else {
file = open(name, create_flag);
}
if (file == -1) {
*success = FALSE;
printf("Error in opening file %s, errno %lu\n", name,
(ulint)errno);
perror("os0file.c:");
if (create_mode != OS_FILE_OPEN
&& errno == ENOSPC) {
retry = os_file_handle_error(file, name);
if (retry) {
goto try_again;
}
}
} else {
*success = TRUE;
}
return(file);
#endif
}
/***************************************************************************
Closes a file handle. In case of error, error number can be retrieved with
os_file_get_last_error. */
bool
os_file_close(
/*==========*/
/* out: TRUE if success */
os_file_t file) /* in, own: handle to a file */
{
#ifdef __WIN__
BOOL ret;
ut_a(file);
ret = CloseHandle(file);
if (ret) {
return(TRUE);
}
return(FALSE);
#else
int ret;
ret = close(file);
if (ret == -1) {
return(FALSE);
}
return(TRUE);
#endif
}
/***************************************************************************
Gets a file size. */
bool
os_file_get_size(
/*=============*/
/* out: TRUE if success */
os_file_t file, /* in: handle to a file */
ulint* size, /* out: least significant 32 bits of file
size */
ulint* size_high)/* out: most significant 32 bits of size */
{
#ifdef __WIN__
DWORD high;
DWORD low;
low = GetFileSize(file, &high);
if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
return(FALSE);
}
*size = low;
*size_high = high;
return(TRUE);
#else
*size = (ulint) lseek(file, 0, SEEK_END);
*size_high = 0;
return(TRUE);
#endif
}
/***************************************************************************
Sets a file size. This function can be used to extend or truncate a file. */
bool
os_file_set_size(
/*=============*/
/* out: TRUE if success */
char* name, /* in: name of the file or path as a
null-terminated string */
os_file_t file, /* in: handle to a file */
ulint size, /* in: least significant 32 bits of file
size */
ulint size_high)/* in: most significant 32 bits of size */
{
#ifdef __WIN__
DWORD high;
DWORD low;
DWORD ret;
BOOL ret2;
DWORD err;
bool retry;
try_again:
low = size;
high = size_high;
ret = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
err = GetLastError();
goto error_handling;
}
ret2 = SetEndOfFile(file);
if (ret2) {
ret2 = os_file_flush(file);
}
if (ret2) {
return(TRUE);
}
#else
ulint offset;
ulint n_bytes;
ulint low;
ssize_t ret;
bool retry;
ulint i;
byte buf[UNIV_PAGE_SIZE * 8];
/* Write buffer full of zeros */
for (i = 0; i < UNIV_PAGE_SIZE * 8; i++) {
buf[i] = '\0';
}
try_again:
low = size;
#if (UNIV_WORD_SIZE == 8)
low = low + (size_high << 32);
#endif
while (offset < low) {
if (low - offset < UNIV_PAGE_SIZE * 8) {
n_bytes = low - offset;
} else {
n_bytes = UNIV_PAGE_SIZE * 8;
}
ret = pwrite(file, buf, n_bytes, offset);
if (ret != n_bytes) {
goto error_handling;
}
offset += n_bytes;
}
ret = os_file_flush(file);
if (ret) {
return(TRUE);
}
#endif
error_handling:
retry = os_file_handle_error(file, name);
if (retry) {
goto try_again;
}
ut_error;
}
/***************************************************************************
Flushes the write buffers of a given file to the disk. */
bool
os_file_flush(
/*==========*/
/* out: TRUE if success */
os_file_t file) /* in, own: handle to a file */
{
#ifdef __WIN__
BOOL ret;
ut_a(file);
ret = FlushFileBuffers(file);
if (ret) {
return(TRUE);
}
return(FALSE);
#else
int ret;
ret = fsync(file);
if (ret == 0) {
return(TRUE);
}
return(FALSE);
#endif
}
#ifndef __WIN__
/***********************************************************************
Does a synchronous read operation in Posix. */
static
ssize_t
os_file_pread(
/*==========*/
/* out: number of bytes read, -1 if error */
os_file_t file, /* in: handle to a file */
void* buf, /* in: buffer where to read */
ulint n, /* in: number of bytes to read */
ulint offset) /* in: offset from where to read */
{
#ifdef HAVE_PREAD
return(pread(file, buf, n, (off_t) offset));
#else
ssize_t ret;
ulint i;
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
ret = lseek(file, (off_t) offset, 0);
if (ret < 0) {
os_mutex_exit(os_file_seek_mutexes[i]);
return(ret);
}
ret = read(file, buf, n);
os_mutex_exit(os_file_seek_mutexes[i]);
return(ret);
#endif
}
/***********************************************************************
Does a synchronous write operation in Posix. */
static
ssize_t
os_file_pwrite(
/*===========*/
/* out: number of bytes written, -1 if error */
os_file_t file, /* in: handle to a file */
void* buf, /* in: buffer from where to write */
ulint n, /* in: number of bytes to write */
ulint offset) /* in: offset where to write */
{
#ifdef HAVE_PWRITE
return(pwrite(file, buf, n, (off_t) offset));
#else
ssize_t ret;
ulint i;
/* Protect the seek / write operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
ret = lseek(file, (off_t) offset, 0);
if (ret < 0) {
os_mutex_exit(os_file_seek_mutexes[i]);
return(ret);
}
ret = write(file, buf, n);
os_mutex_exit(os_file_seek_mutexes[i]);
return(ret);
#endif
}
#endif
/***********************************************************************
Requests a synchronous positioned read operation. */
bool
os_file_read(
/*=========*/
/* out: TRUE if request was
successful, FALSE if fail */
os_file_t file, /* in: handle to a file */
void* buf, /* in: buffer where to read */
ulint offset, /* in: least significant 32 bits of file
offset where to read */
ulint offset_high, /* in: most significant 32 bits of
offset */
ulint n) /* in: number of bytes to read */
{
#ifdef __WIN__
BOOL ret;
DWORD len;
DWORD ret2;
DWORD err;
DWORD low;
DWORD high;
bool retry;
ulint i;
try_again:
ut_ad(file);
ut_ad(buf);
ut_ad(n > 0);
low = offset;
high = offset_high;
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
err = GetLastError();
os_mutex_exit(os_file_seek_mutexes[i]);
goto error_handling;
}
ret = ReadFile(file, buf, n, &len, NULL);
os_mutex_exit(os_file_seek_mutexes[i]);
if (ret && len == n) {
return(TRUE);
}
err = GetLastError();
#else
bool retry;
ssize_t ret;
ulint i;
#if (UNIV_WORD_SIZE == 8)
offset = offset + (offset_high << 32);
#endif
try_again:
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
ret = os_file_pread(file, buf, n, (off_t) offset);
if (ret == n) {
os_mutex_exit(os_file_seek_mutexes[i]);
return(TRUE);
}
#endif
error_handling:
retry = os_file_handle_error(file, NULL);
if (retry) {
goto try_again;
}
ut_error;
return(FALSE);
}
/***********************************************************************
Requests a synchronous write operation. */
bool
os_file_write(
/*==========*/
/* out: TRUE if request was
successful, FALSE if fail */
char* name, /* in: name of the file or path as a
null-terminated string */
os_file_t file, /* in: handle to a file */
void* buf, /* in: buffer from which to write */
ulint offset, /* in: least significant 32 bits of file
offset where to write */
ulint offset_high, /* in: most significant 32 bits of
offset */
ulint n) /* in: number of bytes to write */
{
#ifdef __WIN__
BOOL ret;
DWORD len;
DWORD ret2;
DWORD err;
DWORD low;
DWORD high;
bool retry;
ulint i;
try_again:
ut_ad(file);
ut_ad(buf);
ut_ad(n > 0);
low = offset;
high = offset_high;
/* Protect the seek / write operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
err = GetLastError();
os_mutex_exit(os_file_seek_mutexes[i]);
goto error_handling;
}
ret = WriteFile(file, buf, n, &len, NULL);
os_mutex_exit(os_file_seek_mutexes[i]);
if (ret && len == n) {
return(TRUE);
}
#else
bool retry;
ssize_t ret;
#if (UNIV_WORD_SIZE == 8)
offset = offset + (offset_high << 32);
#endif
try_again:
ret = pwrite(file, buf, n, (off_t) offset);
if (ret == n) {
return(TRUE);
}
#endif
error_handling:
retry = os_file_handle_error(file, name);
if (retry) {
goto try_again;
}
ut_error;
return(FALSE);
}
/********************************************************************
Returns a pointer to the nth slot in the aio array. */
static
os_aio_slot_t*
os_aio_array_get_nth_slot(
/*======================*/
/* out: pointer to slot */
os_aio_array_t* array, /* in: aio array */
ulint index) /* in: index of the slot */
{
ut_a(index < array->n_slots);
return((array->slots) + index);
}
/****************************************************************************
Creates an aio wait array. */
static
os_aio_array_t*
os_aio_array_create(
/*================*/
/* out, own: aio array */
ulint n, /* in: maximum number of pending aio operations
allowed; n must be divisible by n_segments */
ulint n_segments) /* in: number of segments in the aio array */
{
os_aio_array_t* array;
ulint i;
os_aio_slot_t* slot;
#ifdef WIN_ASYNC_IO
OVERLAPPED* over;
#endif
ut_a(n > 0);
ut_a(n_segments > 0);
ut_a(n % n_segments == 0);
array = ut_malloc(sizeof(os_aio_array_t));
array->mutex = os_mutex_create(NULL);
array->not_full = os_event_create(NULL);
array->n_slots = n;
array->n_segments = n_segments;
array->n_reserved = 0;
array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
array->events = ut_malloc(n * sizeof(os_event_t));
for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i);
slot->pos = i;
slot->reserved = FALSE;
#ifdef WIN_ASYNC_IO
over = &(slot->control);
over->hEvent = os_event_create(NULL);
*((array->events) + i) = over->hEvent;
#elif defined(POSIX_ASYNC_IO)
slot->ready = os_event_create(NULL);
#endif
}
return(array);
}
/****************************************************************************
Initializes the asynchronous io system. Creates separate aio array for
non-ibuf read and write, a third aio array for the ibuf i/o, with just one
segment, two aio arrays for log reads and writes with one segment, and a
synchronous aio array of the specified size. The combined number of segments
in the three first aio arrays is the parameter n_segments given to the
function. The caller must create an i/o handler thread for each segment in
the four first arrays, but not for the sync aio array. */
void
os_aio_init(
/*========*/
ulint n, /* in: maximum number of pending aio operations
allowed; n must be divisible by n_segments */
ulint n_segments, /* in: combined number of segments in the four
first aio arrays; must be >= 4 */
ulint n_slots_sync) /* in: number of slots in the sync aio array */
{
ulint n_read_segs;
ulint n_write_segs;
ulint n_per_seg;
ulint i;
ut_ad(n % n_segments == 0);
ut_ad(n_segments >= 4);
n_per_seg = n / n_segments;
n_write_segs = (n_segments - 2) / 2;
n_read_segs = n_segments - 2 - n_write_segs;
os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
n_read_segs);
os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
n_write_segs);
os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
os_aio_log_array = os_aio_array_create(n_per_seg, 1);
os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
os_aio_n_segments = n_segments;
#if !(defined(WIN_ASYNC_IO) || defined(POSIX_ASYNC_IO))
os_aio_use_native_aio = FALSE;
#endif
os_aio_validate();
for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
os_file_seek_mutexes[i] = os_mutex_create(NULL);
}
os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*));
for (i = 0; i < n_segments; i++) {
os_aio_segment_wait_events[i] = os_event_create(NULL);
}
}
/**************************************************************************
Calculates segment number for a slot. */
static
ulint
os_aio_get_segment_no_from_slot(
/*============================*/
/* out: segment number (which is the number
used by, for example, i/o-handler threads) */
os_aio_array_t* array, /* in: aio wait array */
os_aio_slot_t* slot) /* in: slot in this array */
{
ulint segment;
ulint seg_len;
if (array == os_aio_ibuf_array) {
segment = 0;
} else if (array == os_aio_log_array) {
segment = 1;
} else if (array == os_aio_read_array) {
seg_len = os_aio_read_array->n_slots /
os_aio_read_array->n_segments;
segment = 2 + slot->pos / seg_len;
} else {
ut_a(array == os_aio_write_array);
seg_len = os_aio_write_array->n_slots /
os_aio_write_array->n_segments;
segment = os_aio_read_array->n_segments + 2
+ slot->pos / seg_len;
}
return(segment);
}
/**************************************************************************
Calculates local segment number and aio array from global segment number. */
static
ulint
os_aio_get_array_and_local_segment(
/*===============================*/
/* out: local segment number within
the aio array */
os_aio_array_t** array, /* out: aio wait array */
ulint global_segment)/* in: global segment number */
{
ulint segment;
ut_a(global_segment < os_aio_n_segments);
if (global_segment == 0) {
*array = os_aio_ibuf_array;
segment = 0;
} else if (global_segment == 1) {
*array = os_aio_log_array;
segment = 0;
} else if (global_segment < os_aio_read_array->n_segments + 2) {
*array = os_aio_read_array;
segment = global_segment - 2;
} else {
*array = os_aio_write_array;
segment = global_segment - (os_aio_read_array->n_segments + 2);
}
return(segment);
}
/***********************************************************************
Gets an integer value designating a specified aio array. This is used
to give numbers to signals in Posix aio. */
static
ulint
os_aio_get_array_no(
/*================*/
os_aio_array_t* array) /* in: aio array */
{
if (array == os_aio_ibuf_array) {
return(0);
} else if (array == os_aio_log_array) {
return(1);
} else if (array == os_aio_read_array) {
return(2);
} else if (array == os_aio_write_array) {
return(3);
} else {
ut_a(0);
return(0);
}
}
/***********************************************************************
Gets the aio array for its number. */
static
os_aio_array_t*
os_aio_get_array_from_no(
/*=====================*/
/* out: aio array */
ulint n) /* in: array number */
{
if (n == 0) {
return(os_aio_ibuf_array);
} else if (n == 1) {
return(os_aio_log_array);
} else if (n == 2) {
return(os_aio_read_array);
} else if (n == 3) {
return(os_aio_write_array);
} else {
ut_a(0);
return(NULL);
}
}
/***********************************************************************
Requests for a slot in the aio array. If no slot is available, waits until
not_full-event becomes signaled. */
static
os_aio_slot_t*
os_aio_array_reserve_slot(
/*======================*/
/* out: pointer to slot */
ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
os_aio_array_t* array, /* in: aio array */
void* message1,/* in: message to be passed along with
the aio operation */
void* message2,/* in: message to be passed along with
the aio operation */
os_file_t file, /* in: file handle */
char* name, /* in: name of the file or path as a
null-terminated string */
void* buf, /* in: buffer where to read or from which
to write */
ulint offset, /* in: least significant 32 bits of file
offset */
ulint offset_high, /* in: most significant 32 bits of
offset */
ulint len) /* in: length of the block to read or write */
{
os_aio_slot_t* slot;
#ifdef WIN_ASYNC_IO
OVERLAPPED* control;
#elif POSIX_ASYNC_IO
struct aiocb* control;
ulint type;
#endif
ulint i;
loop:
os_mutex_enter(array->mutex);
if (array->n_reserved == array->n_slots) {
os_mutex_exit(array->mutex);
os_event_wait(array->not_full);
goto loop;
}
for (i = 0;; i++) {
slot = os_aio_array_get_nth_slot(array, i);
if (slot->reserved == FALSE) {
break;
}
}
array->n_reserved++;
if (array->n_reserved == array->n_slots) {
os_event_reset(array->not_full);
}
slot->reserved = TRUE;
slot->message1 = message1;
slot->message2 = message2;
slot->file = file;
slot->name = name;
slot->len = len;
slot->type = type;
slot->buf = buf;
slot->offset = offset;
slot->offset_high = offset_high;
slot->io_already_done = FALSE;
#ifdef WIN_ASYNC_IO
control = &(slot->control);
control->Offset = (DWORD)offset;
control->OffsetHigh = (DWORD)offset_high;
os_event_reset(control->hEvent);
#elif POSIX_ASYNC_IO
#if (UNIV_WORD_SIZE == 8)
offset = offset + (offset_high << 32);
#else
ut_a(offset_high == 0);
#endif
control = &(slot->control);
control->aio_fildes = file;
control->aio_buf = buf;
control->aio_nbytes = len;
control->aio_offset = offset;
control->aio_reqprio = 0;
control->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
control->aio_sigevent.signo =
SIGRTMAX + 1 + os_aio_get_array_no(array);
/* TODO: How to choose the signal numbers? */
control->aio_sigevent.sigev_value.sival_ptr = slot;
#endif
os_mutex_exit(array->mutex);
return(slot);
}
/***********************************************************************
Frees a slot in the aio array. */
static
void
os_aio_array_free_slot(
/*===================*/
os_aio_array_t* array, /* in: aio array */
os_aio_slot_t* slot) /* in: pointer to slot */
{
ut_ad(array);
ut_ad(slot);
os_mutex_enter(array->mutex);
ut_ad(slot->reserved);
slot->reserved = FALSE;
array->n_reserved--;
if (array->n_reserved == array->n_slots - 1) {
os_event_set(array->not_full);
}
#ifdef WIN_ASYNC_IO
os_event_reset(slot->control.hEvent);
#endif
os_mutex_exit(array->mutex);
}
/**************************************************************************
Wakes up a simulated aio i/o-handler thread if it has something to do. */
static
void
os_aio_simulated_wake_handler_thread(
/*=================================*/
ulint global_segment) /* in: the number of the segment in the aio
arrays */
{
os_aio_array_t* array;
ulint segment;
os_aio_slot_t* slot;
ulint n;
ulint i;
ut_ad(!os_aio_use_native_aio);
segment = os_aio_get_array_and_local_segment(&array, global_segment);
n = array->n_slots / array->n_segments;
/* Look through n slots after the segment * n'th slot */
os_mutex_enter(array->mutex);
for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i + segment * n);
if (slot->reserved) {
/* Found an i/o request */
break;
}
}
os_mutex_exit(array->mutex);
if (i < n) {
os_event_set(os_aio_segment_wait_events[global_segment]);
}
}
/**************************************************************************
Wakes up simulated aio i/o-handler threads if they have something to do. */
void
os_aio_simulated_wake_handler_threads(void)
/*=======================================*/
{
ulint i;
if (os_aio_use_native_aio) {
/* We do not use simulated aio: do nothing */
return;
}
for (i = 0; i < os_aio_n_segments; i++) {
os_aio_simulated_wake_handler_thread(i);
}
}
/***********************************************************************
Requests an asynchronous i/o operation. */
bool
os_aio(
/*===*/
/* out: TRUE if request was queued
successfully, FALSE if fail */
ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
to OS_AIO_SIMULATED_WAKE_LATER: the
last flag advises this function not to wake
i/o-handler threads, but the caller will
do the waking explicitly later, in this
way the caller can post several requests in
a batch; NOTE that the batch must not be
so big that it exhausts the slots in aio
arrays! NOTE that a simulated batch
may introduce hidden chances of deadlocks,
because i/os are not actually handled until
all have been posted: use with great
caution! */
char* name, /* in: name of the file or path as a
null-terminated string */
os_file_t file, /* in: handle to a file */
void* buf, /* in: buffer where to read or from which
to write */
ulint offset, /* in: least significant 32 bits of file
offset where to read or write */
ulint offset_high, /* in: most significant 32 bits of
offset */
ulint n, /* in: number of bytes to read or write */
void* message1,/* in: messages for the aio handler (these
can be used to identify a completed aio
operation); if mode is OS_AIO_SYNC, these
are ignored */
void* message2)
{
os_aio_array_t* array;
os_aio_slot_t* slot;
#ifdef WIN_ASYNC_IO
BOOL ret = TRUE;
DWORD len = n;
void* dummy_mess1;
void* dummy_mess2;
#endif
ulint err = 0;
bool retry;
ulint wake_later;
ut_ad(file);
ut_ad(buf);
ut_ad(n > 0);
ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_ad((ulint)buf % OS_FILE_LOG_BLOCK_SIZE == 0)
ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_ad(os_aio_validate());
wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
if (mode == OS_AIO_SYNC
#ifdef WIN_ASYNC_IO
&& !os_aio_use_native_aio
#endif
) {
/* This is actually an ordinary synchronous read or write:
no need to use an i/o-handler thread. NOTE that if we use
Windows async i/o, Windows does not allow us to use
ordinary synchronous os_file_read etc. on the same file,
therefore we have built a special mechanism for synchronous
wait in the Windows case. */
if (type == OS_FILE_READ) {
return(os_file_read(file, buf, offset, offset_high, n));
}
ut_a(type == OS_FILE_WRITE);
return(os_file_write(name, file, buf, offset, offset_high, n));
}
try_again:
if (mode == OS_AIO_NORMAL) {
if (type == OS_FILE_READ) {
array = os_aio_read_array;
} else {
array = os_aio_write_array;
}
} else if (mode == OS_AIO_IBUF) {
ut_ad(type == OS_FILE_READ);
array = os_aio_ibuf_array;
} else if (mode == OS_AIO_LOG) {
array = os_aio_log_array;
} else if (mode == OS_AIO_SYNC) {
array = os_aio_sync_array;
} else {
ut_error;
}
slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
name, buf, offset, offset_high, n);
if (type == OS_FILE_READ) {
if (os_aio_use_native_aio) {
#ifdef WIN_ASYNC_IO
ret = ReadFile(file, buf, (DWORD)n, &len,
&(slot->control));
#elif defined(POSIX_ASYNC_IO)
err = (ulint) aio_read(&(slot->control));
#endif
} else {
if (!wake_later) {
os_aio_simulated_wake_handler_thread(
os_aio_get_segment_no_from_slot(array, slot));
}
}
} else if (type == OS_FILE_WRITE) {
if (os_aio_use_native_aio) {
#ifdef WIN_ASYNC_IO
ret = WriteFile(file, buf, (DWORD)n, &len,
&(slot->control));
#elif defined(POSIX_ASYNC_IO)
err = (ulint) aio_write(&(slot->control));
#endif
} else {
if (!wake_later) {
os_aio_simulated_wake_handler_thread(
os_aio_get_segment_no_from_slot(array, slot));
}
}
} else {
ut_error;
}
#ifdef WIN_ASYNC_IO
if ((ret && len == n)
|| (!ret && GetLastError() == ERROR_IO_PENDING)) {
/* aio was queued successfully! */
if (mode == OS_AIO_SYNC) {
/* We want a synchronous i/o operation on a file
where we also use async i/o: in Windows we must
use the same wait mechanism as for async i/o */
return(os_aio_windows_handle(ULINT_UNDEFINED,
slot->pos,
&dummy_mess1, &dummy_mess2));
}
return(TRUE);
}
#else
if (err == 0) {
/* aio was queued successfully! */
return(TRUE);
}
#endif
os_aio_array_free_slot(array, slot);
retry = os_file_handle_error(file, name);
if (retry) {
goto try_again;
}
ut_error;
return(FALSE);
}
#ifdef WIN_ASYNC_IO
/**************************************************************************
This function is only used in Windows asynchronous i/o.
Waits for an aio operation to complete. This function is used to wait the
for completed requests. The aio array of pending requests is divided
into segments. The thread specifies which segment or slot it wants to wait
for. NOTE: this function will also take care of freeing the aio slot,
therefore no other thread is allowed to do the freeing! */
bool
os_aio_windows_handle(
/*==================*/
/* out: TRUE if the aio operation succeeded */
ulint segment, /* in: the number of the segment in the aio
arrays to wait for; segment 0 is the ibuf
i/o thread, segment 1 the log i/o thread,
then follow the non-ibuf read threads, and as
the last are the non-ibuf write threads; if
this is ULINT_UNDEFINED, then it means that
sync aio is used, and this parameter is
ignored */
ulint pos, /* this parameter is used only in sync aio:
wait for the aio slot at this position */
void** message1, /* out: the messages passed with the aio
request; note that also in the case where
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
void** message2)
{
os_aio_array_t* array;
os_aio_slot_t* slot;
ulint n;
ulint i;
bool ret_val;
ulint err;
BOOL ret;
DWORD len;
if (segment == ULINT_UNDEFINED) {
array = os_aio_sync_array;
segment = 0;
} else {
segment = os_aio_get_array_and_local_segment(&array, segment);
}
/* NOTE! We only access constant fields in os_aio_array. Therefore
we do not have to acquire the protecting mutex yet */
ut_ad(os_aio_validate());
ut_ad(segment < array->n_segments);
n = array->n_slots / array->n_segments;
if (array == os_aio_sync_array) {
ut_ad(pos < array->n_slots);
os_event_wait(array->events[pos]);
i = pos;
} else {
i = os_event_wait_multiple(n, (array->events) + segment * n);
}
os_mutex_enter(array->mutex);
slot = os_aio_array_get_nth_slot(array, i + segment * n);
ut_a(slot->reserved);
ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
*message1 = slot->message1;
*message2 = slot->message2;
if (ret && len == slot->len) {
ret_val = TRUE;
} else {
err = GetLastError();
ut_error;
ret_val = FALSE;
}
os_mutex_exit(array->mutex);
os_aio_array_free_slot(array, slot);
return(ret_val);
}
#endif
#ifdef POSIX_ASYNC_IO
/**************************************************************************
This function is only used in Posix asynchronous i/o. Waits for an aio
operation to complete. */
bool
os_aio_posix_handle(
/*================*/
/* out: TRUE if the aio operation succeeded */
ulint array_no, /* in: array number 0 - 3 */
void** message1, /* out: the messages passed with the aio
request; note that also in the case where
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
void** message2)
{
os_aio_array_t* array;
os_aio_slot_t* slot;
siginfo_t info;
sigset_t sigset;
int ret;
sigemptyset(&sigset);
sigaddset(&sigset, SIGRTMAX + 1 + array_no);
ret = sigwaitinfo(&sigset, &info);
if (ret != SIGRTMAX + 1 + array_no) {
ut_a(0);
return(FALSE);
}
array = os_aio_get_array_from_no(array_no);
os_mutex_enter(array->mutex);
slot = siginfo.si_value.sival_ptr;
ut_a(slot->reserved);
*message1 = slot->message1;
*message2 = slot->message2;
os_mutex_exit(array->mutex);
os_aio_array_free_slot(array, slot);
return(TRUE);
}
#endif
/**************************************************************************
Does simulated aio. This function should be called by an i/o-handler
thread. */
bool
os_aio_simulated_handle(
/*====================*/
/* out: TRUE if the aio operation succeeded */
ulint global_segment, /* in: the number of the segment in the aio
arrays to wait for; segment 0 is the ibuf
i/o thread, segment 1 the log i/o thread,
then follow the non-ibuf read threads, and as
the last are the non-ibuf write threads */
void** message1, /* out: the messages passed with the aio
request; note that also in the case where
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
void** message2)
{
os_aio_array_t* array;
ulint segment;
os_aio_slot_t* slot;
os_aio_slot_t* slot2;
os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
ulint n_consecutive;
ulint total_len;
ulint offs;
byte* combined_buf;
bool ret;
ulint n;
ulint i;
segment = os_aio_get_array_and_local_segment(&array, global_segment);
restart:
/* NOTE! We only access constant fields in os_aio_array. Therefore
we do not have to acquire the protecting mutex yet */
ut_ad(os_aio_validate());
ut_ad(segment < array->n_segments);
n = array->n_slots / array->n_segments;
/* Look through n slots after the segment * n'th slot */
os_mutex_enter(array->mutex);
/* Check if there is a slot for which the i/o has already been
done */
for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i + segment * n);
if (slot->reserved && slot->io_already_done) {
goto slot_io_done;
}
}
n_consecutive = 0;
for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i + segment * n);
if (slot->reserved) {
/* Found an i/o request */
consecutive_ios[n_consecutive] = slot;
n_consecutive++;
break;
}
}
/* Check if there are several consecutive blocks to read or write */
consecutive_loop:
for (i = 0; i < n; i++) {
slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
if (slot2->reserved && slot2 != slot
&& slot2->offset == slot->offset + slot->len
&& slot->offset + slot->len > slot->offset /* check that
sum does not wrap over */
&& slot2->offset_high == slot->offset_high
&& slot2->type == slot->type
&& slot2->file == slot->file) {
/* Found a consecutive i/o request */
consecutive_ios[n_consecutive] = slot2;
n_consecutive++;
slot = slot2;
if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
goto consecutive_loop;
} else {
break;
}
}
}
if (n_consecutive == 0) {
/* No i/o requested at the moment */
goto wait_for_io;
}
/* We have now collected n_consecutive i/o requests in the array;
allocate a single buffer which can hold all data, and perform the
i/o */
total_len = 0;
slot = consecutive_ios[0];
for (i = 0; i < n_consecutive; i++) {
total_len += consecutive_ios[i]->len;
}
if (n_consecutive == 1) {
/* We can use the buffer of the i/o request */
combined_buf = slot->buf;
} else {
combined_buf = ut_malloc(total_len);
ut_a(combined_buf);
}
/* We release the array mutex for the time of the i/o: NOTE that
this assumes that there is just one i/o-handler thread serving
a single segment of slots! */
os_mutex_exit(array->mutex);
if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
/* Copy the buffers to the combined buffer */
offs = 0;
for (i = 0; i < n_consecutive; i++) {
ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
consecutive_ios[i]->len);
offs += consecutive_ios[i]->len;
}
}
/* Do the i/o with ordinary, synchronous i/o functions: */
if (slot->type == OS_FILE_WRITE) {
ret = os_file_write(slot->name, slot->file, combined_buf,
slot->offset, slot->offset_high, total_len);
} else {
ret = os_file_read(slot->file, combined_buf,
slot->offset, slot->offset_high, total_len);
}
ut_a(ret);
if (slot->type == OS_FILE_READ && n_consecutive > 1) {
/* Copy the combined buffer to individual buffers */
offs = 0;
for (i = 0; i < n_consecutive; i++) {
ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
consecutive_ios[i]->len);
offs += consecutive_ios[i]->len;
}
}
if (n_consecutive > 1) {
ut_free(combined_buf);
}
os_mutex_enter(array->mutex);
/* Mark the i/os done in slots */
for (i = 0; i < n_consecutive; i++) {
consecutive_ios[i]->io_already_done = TRUE;
}
/* We return the messages for the first slot now, and if there were
several slots, the messages will be returned with subsequent calls
of this function */
slot_io_done:
ut_a(slot->reserved);
*message1 = slot->message1;
*message2 = slot->message2;
os_mutex_exit(array->mutex);
os_aio_array_free_slot(array, slot);
return(ret);
wait_for_io:
/* We wait here until there again can be i/os in the segment
of this thread */
os_event_reset(os_aio_segment_wait_events[global_segment]);
os_mutex_exit(array->mutex);
os_event_wait(os_aio_segment_wait_events[global_segment]);
goto restart;
}
/**************************************************************************
Validates the consistency of an aio array. */
static
bool
os_aio_array_validate(
/*==================*/
/* out: TRUE if ok */
os_aio_array_t* array) /* in: aio wait array */
{
os_aio_slot_t* slot;
ulint n_reserved = 0;
ulint i;
ut_a(array);
os_mutex_enter(array->mutex);
ut_a(array->n_slots > 0);
ut_a(array->n_segments > 0);
for (i = 0; i < array->n_slots; i++) {
slot = os_aio_array_get_nth_slot(array, i);
if (slot->reserved) {
n_reserved++;
ut_a(slot->len > 0);
}
}
ut_a(array->n_reserved == n_reserved);
os_mutex_exit(array->mutex);
return(TRUE);
}
/**************************************************************************
Validates the consistency the aio system. */
bool
os_aio_validate(void)
/*=================*/
/* out: TRUE if ok */
{
os_aio_array_validate(os_aio_read_array);
os_aio_array_validate(os_aio_write_array);
os_aio_array_validate(os_aio_ibuf_array);
os_aio_array_validate(os_aio_log_array);
os_aio_array_validate(os_aio_sync_array);
return(TRUE);
}
/**************************************************************************
Prints info of the aio arrays. */
void
os_aio_print(void)
/*==============*/
{
os_aio_array_t* array;
os_aio_slot_t* slot;
ulint n_reserved;
ulint i;
array = os_aio_read_array;
loop:
ut_a(array);
printf("INFO OF AN AIO ARRAY\n");
os_mutex_enter(array->mutex);
ut_a(array->n_slots > 0);
ut_a(array->n_segments > 0);
n_reserved = 0;
for (i = 0; i < array->n_slots; i++) {
slot = os_aio_array_get_nth_slot(array, i);
if (slot->reserved) {
n_reserved++;
printf("Reserved slot, messages %lx %lx\n",
slot->message1, slot->message2);
ut_a(slot->len > 0);
}
}
ut_a(array->n_reserved == n_reserved);
printf("Total of %lu reserved aio slots\n", n_reserved);
os_mutex_exit(array->mutex);
if (array == os_aio_read_array) {
array = os_aio_write_array;
goto loop;
}
if (array == os_aio_write_array) {
array = os_aio_ibuf_array;
goto loop;
}
if (array == os_aio_ibuf_array) {
array = os_aio_log_array;
goto loop;
}
if (array == os_aio_log_array) {
array = os_aio_sync_array;
goto loop;
}
}
/**************************************************************************
Checks that all slots in the system have been freed, that is, there are
no pending io operations. */
bool
os_aio_all_slots_free(void)
/*=======================*/
/* out: TRUE if all free */
{
os_aio_array_t* array;
ulint n_res = 0;
array = os_aio_read_array;
os_mutex_enter(array->mutex);
n_res += array->n_reserved;
os_mutex_exit(array->mutex);
array = os_aio_write_array;
os_mutex_enter(array->mutex);
n_res += array->n_reserved;
os_mutex_exit(array->mutex);
array = os_aio_ibuf_array;
os_mutex_enter(array->mutex);
n_res += array->n_reserved;
os_mutex_exit(array->mutex);
array = os_aio_log_array;
os_mutex_enter(array->mutex);
n_res += array->n_reserved;
os_mutex_exit(array->mutex);
array = os_aio_sync_array;
os_mutex_enter(array->mutex);
n_res += array->n_reserved;
os_mutex_exit(array->mutex);
if (n_res == 0) {
return(TRUE);
}
return(FALSE);
}
......@@ -863,123 +863,6 @@ srv_release_max_if_no_queries(void)
mutex_exit(&kernel_mutex);
}
#ifdef notdefined
/***********************************************************************
Releases one utility thread if no queries are active and
the high-water mark 2 for the utility is exceeded. */
static
void
srv_release_one_if_no_queries(void)
/*===============================*/
{
ulint m;
ulint type;
mutex_enter(&kernel_mutex);
if (srv_n_threads_active[SRV_COM] > 0) {
mutex_exit(&kernel_mutex);
return;
}
type = SRV_RECOVERY;
m = 1;
if ((srv_meter[type] > srv_meter_high_water2[type])
&& (srv_n_threads_active[type] < m)) {
srv_release_threads(type, m - srv_n_threads_active[type]);
printf("Releasing one background\n");
}
mutex_exit(&kernel_mutex);
}
/***********************************************************************
Decrements the utility meter by the value given and suspends the calling
thread, which must be an utility thread of the type given, if necessary. */
static
void
srv_decrement_meter(
/*================*/
ulint type, /* in: utility type */
ulint n) /* in: value to subtract from meter */
{
ulint opt;
os_event_t event;
mutex_enter(&kernel_mutex);
if (srv_meter[type] < n) {
srv_meter[type] = 0;
} else {
srv_meter[type] -= n;
}
opt = srv_max_n_utilities(type);
if (opt < srv_n_threads_active[type]) {
event = srv_suspend_thread();
mutex_exit(&kernel_mutex);
os_event_wait(event);
} else {
mutex_exit(&kernel_mutex);
}
}
#endif
/*************************************************************************
Implements the server console. */
ulint
srv_console(
/*========*/
/* out: return code, not used */
void* arg) /* in: argument, not used */
{
char command[256];
UT_NOT_USED(arg);
mutex_enter(&kernel_mutex);
srv_table_reserve_slot(SRV_CONSOLE);
mutex_exit(&kernel_mutex);
os_event_wait(srv_sys->operational);
for (;;) {
scanf("%s", command);
srv_inc_thread_count(SRV_CONSOLE);
if (command[0] == 'c') {
printf("Making checkpoint\n");
log_make_checkpoint_at(ut_dulint_max, TRUE);
printf("Checkpoint completed\n");
} else if (command[0] == 'd') {
srv_sim_disk_wait_pct = atoi(command + 1);
printf(
"Starting disk access simulation with pct %lu\n",
srv_sim_disk_wait_pct);
} else {
printf("\nNot supported!\n");
}
srv_dec_thread_count(SRV_CONSOLE);
}
return(0);
}
/*************************************************************************
Creates the first communication endpoint for the server. This
first call also initializes the com0com.* module. */
......@@ -1008,69 +891,6 @@ srv_communication_init(
ut_a(ret == 0);
}
#ifdef notdefined
/*************************************************************************
Implements the recovery utility. */
static
ulint
srv_recovery_thread(
/*================*/
/* out: return code, not used */
void* arg) /* in: not used */
{
ulint slot_no;
os_event_t event;
UT_NOT_USED(arg);
slot_no = srv_table_reserve_slot(SRV_RECOVERY);
os_event_wait(srv_sys->operational);
for (;;) {
/* Finish a possible recovery */
srv_inc_thread_count(SRV_RECOVERY);
/* recv_recovery_from_checkpoint_finish(); */
srv_dec_thread_count(SRV_RECOVERY);
mutex_enter(&kernel_mutex);
event = srv_suspend_thread();
mutex_exit(&kernel_mutex);
/* Wait for somebody to release this thread; (currently, this
should never be released) */
os_event_wait(event);
}
return(0);
}
/*************************************************************************
Implements the purge utility. */
ulint
srv_purge_thread(
/*=============*/
/* out: return code, not used */
void* arg) /* in: not used */
{
UT_NOT_USED(arg);
os_event_wait(srv_sys->operational);
for (;;) {
trx_purge();
}
return(0);
}
#endif /* notdefined */
/*************************************************************************
Creates the utility threads. */
......@@ -1100,58 +920,6 @@ srv_create_utility_threads(void)
ut_a(thread); */
}
#ifdef notdefined
/*************************************************************************
Implements the communication threads. */
static
ulint
srv_com_thread(
/*===========*/
/* out: return code; not used */
void* arg) /* in: not used */
{
byte* msg_buf;
byte* addr_buf;
ulint msg_len;
ulint addr_len;
ulint ret;
UT_NOT_USED(arg);
srv_table_reserve_slot(SRV_COM);
os_event_wait(srv_sys->operational);
msg_buf = mem_alloc(com_endpoint_get_max_size(srv_sys->endpoint));
addr_buf = mem_alloc(COM_MAX_ADDR_LEN);
for (;;) {
ret = com_recvfrom(srv_sys->endpoint, msg_buf,
com_endpoint_get_max_size(srv_sys->endpoint),
&msg_len, (char*)addr_buf, COM_MAX_ADDR_LEN,
&addr_len);
ut_a(ret == 0);
srv_inc_thread_count(SRV_COM);
sess_process_cli_msg(msg_buf, msg_len, addr_buf, addr_len);
/* srv_increment_meter(SRV_RECOVERY, 1); */
srv_dec_thread_count(SRV_COM);
/* Release one utility thread for each utility if
high water mark 2 is exceeded and there are no
active queries. This is done to utilize possible
quiet time in the server. */
srv_release_one_if_no_queries();
}
return(0);
}
#endif
/*************************************************************************
Creates the communication threads. */
......@@ -1171,53 +939,6 @@ srv_create_com_threads(void)
}
}
#ifdef notdefined
/*************************************************************************
Implements the worker threads. */
static
ulint
srv_worker_thread(
/*==============*/
/* out: return code, not used */
void* arg) /* in: not used */
{
os_event_t event;
UT_NOT_USED(arg);
srv_table_reserve_slot(SRV_WORKER);
os_event_wait(srv_sys->operational);
for (;;) {
mutex_enter(&kernel_mutex);
event = srv_suspend_thread();
mutex_exit(&kernel_mutex);
/* Wait for somebody to release this thread */
os_event_wait(event);
srv_inc_thread_count(SRV_WORKER);
/* Check in the server task queue if there is work for this
thread, and do the work */
srv_que_task_queue_check();
srv_dec_thread_count(SRV_WORKER);
/* Release one utility thread for each utility if
high water mark 2 is exceeded and there are no
active queries. This is done to utilize possible
quiet time in the server. */
srv_release_one_if_no_queries();
}
return(0);
}
#endif
/*************************************************************************
Creates the worker threads. */
......@@ -1238,404 +959,6 @@ srv_create_worker_threads(void)
}
}
#ifdef notdefined
/*************************************************************************
Reads a keyword and a value from a file. */
ulint
srv_read_init_val(
/*==============*/
/* out: DB_SUCCESS or error code */
FILE* initfile, /* in: file pointer */
char* keyword, /* in: keyword before value(s), or NULL if
no keyword read */
char* str_buf, /* in/out: buffer for a string value to read,
buffer size must be 10000 bytes, if NULL
then not read */
ulint* num_val, /* out: numerical value to read, if NULL
then not read */
ibool print_not_err) /* in: if TRUE, then we will not print
error messages to console */
{
ulint ret;
char scan_buf[10000];
if (keyword == NULL) {
goto skip_keyword;
}
ret = fscanf(initfile, "%9999s", scan_buf);
if (ret == 0 || ret == EOF || 0 != ut_strcmp(scan_buf, keyword)) {
if (print_not_err) {
return(DB_ERROR);
}
printf("Error in InnoDB booting: keyword %s not found\n",
keyword);
printf("from the initfile!\n");
return(DB_ERROR);
}
skip_keyword:
if (num_val == NULL && str_buf == NULL) {
return(DB_SUCCESS);
}
ret = fscanf(initfile, "%9999s", scan_buf);
if (ret == EOF || ret == 0) {
if (print_not_err) {
return(DB_ERROR);
}
printf(
"Error in InnoDB booting: could not read first value after %s\n",
keyword);
printf("from the initfile!\n");
return(DB_ERROR);
}
if (str_buf) {
ut_memcpy(str_buf, scan_buf, 10000);
printf("init keyword %s value %s read\n", keyword, str_buf);
if (!num_val) {
return(DB_SUCCESS);
}
ret = fscanf(initfile, "%9999s", scan_buf);
if (ret == EOF || ret == 0) {
if (print_not_err) {
return(DB_ERROR);
}
printf(
"Error in InnoDB booting: could not read second value after %s\n",
keyword);
printf("from the initfile!\n");
return(DB_ERROR);
}
}
if (ut_strlen(scan_buf) > 9) {
if (print_not_err) {
return(DB_ERROR);
}
printf(
"Error in InnoDB booting: numerical value too big after %s\n",
keyword);
printf("in the initfile!\n");
return(DB_ERROR);
}
*num_val = (ulint)atoi(scan_buf);
if (*num_val >= 1000000000) {
if (print_not_err) {
return(DB_ERROR);
}
printf(
"Error in InnoDB booting: numerical value too big after %s\n",
keyword);
printf("in the initfile!\n");
return(DB_ERROR);
}
printf("init keyword %s value %lu read\n", keyword, *num_val);
return(DB_SUCCESS);
}
/*************************************************************************
Reads keywords and values from an initfile. */
ulint
srv_read_initfile(
/*==============*/
/* out: DB_SUCCESS or error code */
FILE* initfile) /* in: file pointer */
{
char str_buf[10000];
ulint n;
ulint i;
ulint ulint_val;
ulint val1;
ulint val2;
ulint err;
err = srv_read_init_val(initfile, "INNOBASE_DATA_HOME_DIR",
str_buf, NULL, FALSE);
if (err != DB_SUCCESS) return(err);
srv_data_home = ut_malloc(ut_strlen(str_buf) + 1);
ut_memcpy(srv_data_home, str_buf, ut_strlen(str_buf) + 1);
err = srv_read_init_val(initfile,"TABLESPACE_NUMBER_OF_DATA_FILES",
NULL, &n, FALSE);
if (err != DB_SUCCESS) return(err);
srv_n_data_files = n;
srv_data_file_names = ut_malloc(n * sizeof(char*));
srv_data_file_sizes = ut_malloc(n * sizeof(ulint));
for (i = 0; i < n; i++) {
err = srv_read_init_val(initfile,
"DATA_FILE_PATH_AND_SIZE_MB",
str_buf, &ulint_val, FALSE);
if (err != DB_SUCCESS) return(err);
srv_data_file_names[i] = ut_malloc(ut_strlen(str_buf) + 1);
ut_memcpy(srv_data_file_names[i], str_buf,
ut_strlen(str_buf) + 1);
srv_data_file_sizes[i] = ulint_val
* ((1024 * 1024) / UNIV_PAGE_SIZE);
}
err = srv_read_init_val(initfile,
"NUMBER_OF_MIRRORED_LOG_GROUPS", NULL,
&srv_n_log_groups, FALSE);
if (err != DB_SUCCESS) return(err);
err = srv_read_init_val(initfile,
"NUMBER_OF_LOG_FILES_IN_GROUP", NULL,
&srv_n_log_files, FALSE);
if (err != DB_SUCCESS) return(err);
err = srv_read_init_val(initfile, "LOG_FILE_SIZE_KB", NULL,
&srv_log_file_size, FALSE);
if (err != DB_SUCCESS) return(err);
srv_log_file_size = srv_log_file_size / (UNIV_PAGE_SIZE / 1024);
srv_log_group_home_dirs = ut_malloc(srv_n_log_files * sizeof(char*));
for (i = 0; i < srv_n_log_groups; i++) {
err = srv_read_init_val(initfile,
"INNOBASE_LOG_GROUP_HOME_DIR",
str_buf, NULL, FALSE);
if (err != DB_SUCCESS) return(err);
srv_log_group_home_dirs[i] = ut_malloc(ut_strlen(str_buf) + 1);
ut_memcpy(srv_log_group_home_dirs[i], str_buf,
ut_strlen(str_buf) + 1);
}
err = srv_read_init_val(initfile, "INNOBASE_LOG_ARCH_DIR",
str_buf, NULL, FALSE);
if (err != DB_SUCCESS) return(err);
srv_arch_dir = ut_malloc(ut_strlen(str_buf) + 1);
ut_memcpy(srv_arch_dir, str_buf, ut_strlen(str_buf) + 1);
err = srv_read_init_val(initfile, "LOG_ARCHIVE_ON(1/0)", NULL,
&srv_log_archive_on, FALSE);
if (err != DB_SUCCESS) return(err);
err = srv_read_init_val(initfile, "LOG_BUFFER_SIZE_KB", NULL,
&srv_log_buffer_size, FALSE);
if (err != DB_SUCCESS) return(err);
srv_log_buffer_size = srv_log_buffer_size / (UNIV_PAGE_SIZE / 1024);
err = srv_read_init_val(initfile, "FLUSH_LOG_AT_TRX_COMMIT(1/0)", NULL,
&srv_flush_log_at_trx_commit, FALSE);
if (err != DB_SUCCESS) return(err);
err = srv_read_init_val(initfile, "BUFFER_POOL_SIZE_MB", NULL,
&srv_pool_size, FALSE);
if (err != DB_SUCCESS) return(err);
srv_pool_size = srv_pool_size * ((1024 * 1024) / UNIV_PAGE_SIZE);
err = srv_read_init_val(initfile, "ADDITIONAL_MEM_POOL_SIZE_MB", NULL,
&srv_mem_pool_size, FALSE);
if (err != DB_SUCCESS) return(err);
srv_mem_pool_size = srv_mem_pool_size * 1024 * 1024;
srv_lock_table_size = 20 * srv_pool_size;
err = srv_read_init_val(initfile, "NUMBER_OF_FILE_IO_THREADS", NULL,
&srv_n_file_io_threads, FALSE);
if (err != DB_SUCCESS) return(err);
err = srv_read_init_val(initfile, "SRV_RECOVER_FROM_BACKUP",
NULL, NULL, TRUE);
if (err == DB_SUCCESS) {
srv_archive_recovery = TRUE;
srv_archive_recovery_limit_lsn = ut_dulint_max;
err = srv_read_init_val(initfile, NULL, NULL, &val1, TRUE);
err = srv_read_init_val(initfile, NULL, NULL, &val2, TRUE);
if (err == DB_SUCCESS) {
srv_archive_recovery_limit_lsn =
ut_dulint_create(val1, val2);
}
}
/* err = srv_read_init_val(initfile,
"SYNC_NUMBER_OF_SPIN_WAIT_ROUNDS", NULL,
&srv_n_spin_wait_rounds);
err = srv_read_init_val(initfile, "SYNC_SPIN_WAIT_DELAY", NULL,
&srv_spin_wait_delay); */
return(DB_SUCCESS);
}
/*************************************************************************
Reads keywords and a values from an initfile. In case of an error, exits
from the process. */
void
srv_read_initfile(
/*==============*/
FILE* initfile) /* in: file pointer */
{
char str_buf[10000];
ulint ulint_val;
srv_read_init_val(initfile, FALSE, "SRV_ENDPOINT_NAME", str_buf,
&ulint_val);
ut_a(ut_strlen(str_buf) < COM_MAX_ADDR_LEN);
ut_memcpy(srv_endpoint_name, str_buf, COM_MAX_ADDR_LEN);
srv_read_init_val(initfile, TRUE, "SRV_N_COM_THREADS", str_buf,
&srv_n_com_threads);
srv_read_init_val(initfile, TRUE, "SRV_N_WORKER_THREADS", str_buf,
&srv_n_worker_threads);
srv_read_init_val(initfile, TRUE, "SYNC_N_SPIN_WAIT_ROUNDS", str_buf,
&srv_n_spin_wait_rounds);
srv_read_init_val(initfile, TRUE, "SYNC_SPIN_WAIT_DELAY", str_buf,
&srv_spin_wait_delay);
srv_read_init_val(initfile, TRUE, "THREAD_PRIORITY_BOOST", str_buf,
&srv_priority_boost);
srv_read_init_val(initfile, TRUE, "N_SPACES", str_buf, &srv_n_spaces);
srv_read_init_val(initfile, TRUE, "N_FILES", str_buf, &srv_n_files);
srv_read_init_val(initfile, TRUE, "FILE_SIZE", str_buf,
&srv_file_size);
srv_read_init_val(initfile, TRUE, "N_LOG_GROUPS", str_buf,
&srv_n_log_groups);
srv_read_init_val(initfile, TRUE, "N_LOG_FILES", str_buf,
&srv_n_log_files);
srv_read_init_val(initfile, TRUE, "LOG_FILE_SIZE", str_buf,
&srv_log_file_size);
srv_read_init_val(initfile, TRUE, "LOG_ARCHIVE_ON", str_buf,
&srv_log_archive_on);
srv_read_init_val(initfile, TRUE, "LOG_BUFFER_SIZE", str_buf,
&srv_log_buffer_size);
srv_read_init_val(initfile, TRUE, "FLUSH_LOG_AT_TRX_COMMIT", str_buf,
&srv_flush_log_at_trx_commit);
srv_read_init_val(initfile, TRUE, "POOL_SIZE", str_buf,
&srv_pool_size);
srv_read_init_val(initfile, TRUE, "MEM_POOL_SIZE", str_buf,
&srv_mem_pool_size);
srv_read_init_val(initfile, TRUE, "LOCK_TABLE_SIZE", str_buf,
&srv_lock_table_size);
srv_read_init_val(initfile, TRUE, "SIM_DISK_WAIT_PCT", str_buf,
&srv_sim_disk_wait_pct);
srv_read_init_val(initfile, TRUE, "SIM_DISK_WAIT_LEN", str_buf,
&srv_sim_disk_wait_len);
srv_read_init_val(initfile, TRUE, "SIM_DISK_WAIT_BY_YIELD", str_buf,
&srv_sim_disk_wait_by_yield);
srv_read_init_val(initfile, TRUE, "SIM_DISK_WAIT_BY_WAIT", str_buf,
&srv_sim_disk_wait_by_wait);
srv_read_init_val(initfile, TRUE, "MEASURE_CONTENTION", str_buf,
&srv_measure_contention);
srv_read_init_val(initfile, TRUE, "MEASURE_BY_SPIN", str_buf,
&srv_measure_by_spin);
srv_read_init_val(initfile, TRUE, "PRINT_THREAD_RELEASES", str_buf,
&srv_print_thread_releases);
srv_read_init_val(initfile, TRUE, "PRINT_LOCK_WAITS", str_buf,
&srv_print_lock_waits);
if (srv_print_lock_waits) {
lock_print_waits = TRUE;
}
srv_read_init_val(initfile, TRUE, "PRINT_BUF_IO", str_buf,
&srv_print_buf_io);
if (srv_print_buf_io) {
buf_debug_prints = TRUE;
}
srv_read_init_val(initfile, TRUE, "PRINT_LOG_IO", str_buf,
&srv_print_log_io);
if (srv_print_log_io) {
log_debug_writes = TRUE;
}
srv_read_init_val(initfile, TRUE, "PRINT_PARSED_SQL", str_buf,
&srv_print_parsed_sql);
if (srv_print_parsed_sql) {
pars_print_lexed = TRUE;
}
srv_read_init_val(initfile, TRUE, "PRINT_LATCH_WAITS", str_buf,
&srv_print_latch_waits);
srv_read_init_val(initfile, TRUE, "TEST_EXTRA_MUTEXES", str_buf,
&srv_test_extra_mutexes);
srv_read_init_val(initfile, TRUE, "TEST_NOCACHE", str_buf,
&srv_test_nocache);
srv_read_init_val(initfile, TRUE, "TEST_CACHE_EVICT", str_buf,
&srv_test_cache_evict);
srv_read_init_val(initfile, TRUE, "TEST_SYNC", str_buf,
&srv_test_sync);
srv_read_init_val(initfile, TRUE, "TEST_N_THREADS", str_buf,
&srv_test_n_threads);
srv_read_init_val(initfile, TRUE, "TEST_N_LOOPS", str_buf,
&srv_test_n_loops);
srv_read_init_val(initfile, TRUE, "TEST_N_FREE_RNDS", str_buf,
&srv_test_n_free_rnds);
srv_read_init_val(initfile, TRUE, "TEST_N_RESERVED_RNDS", str_buf,
&srv_test_n_reserved_rnds);
srv_read_init_val(initfile, TRUE, "TEST_N_MUTEXES", str_buf,
&srv_test_n_mutexes);
srv_read_init_val(initfile, TRUE, "TEST_ARRAY_SIZE", str_buf,
&srv_test_array_size);
}
#endif
/*************************************************************************
Initializes the server. */
......
......@@ -1147,8 +1147,6 @@ trx_sig_send(
ut_a(0);
/* sess_raise_error_low(trx, 0, 0, NULL, NULL, NULL, NULL,
"Incompatible signal"); */
return(FALSE);
}
......@@ -1197,9 +1195,6 @@ trx_sig_send(
in the error state: */
ut_a(0);
sess_raise_error_low(trx, 0, 0, NULL, NULL, NULL, NULL,
(char *) "Signal from another session, or a break execution signal");
}
/* If there were no other signals ahead in the queue, try to start
......
......@@ -28,6 +28,13 @@ Created 6/25/1996 Heikki Tuuri
/* The session system global data structure */
sess_sys_t* sess_sys = NULL;
/*************************************************************************
Closes a session, freeing the memory occupied by it. */
static
void
sess_close(
/*=======*/
sess_t* sess); /* in, own: session object */
/*************************************************************************
Communicates an error message to the client. If sess->client_waits is not
TRUE, puts the session to error state and does not try to send the error
......@@ -85,42 +92,6 @@ sess_cli_msg_set_sess(
mach_write_to_4(str + SESS_CLI_MSG_SESS_ID_CHECK, fold);
}
/*************************************************************************
Returns the session to which a message from a client is addressed.
NOTE: this function does not assume that the message is uncorrupted. */
static
sess_t*
sess_cli_msg_get_sess(
/*==================*/
/* out: session, NULL if not found */
byte* str, /* in: message string */
ulint len) /* in: message string length */
{
sess_t* sess;
ulint fold;
dulint id;
ut_ad(mutex_own(&kernel_mutex));
if (len < SESS_CLI_MSG_SESS_ID_CHECK + 4) {
return(NULL);
}
id = mach_read_from_8(str + SESS_CLI_MSG_SESS_ID);
fold = sess_id_fold(id);
if (fold != mach_read_from_4(str + SESS_CLI_MSG_SESS_ID_CHECK)) {
return(NULL);
}
HASH_SEARCH(hash, sess_sys->hash, fold, sess,
UT_DULINT_EQ(id, sess->id));
return(sess);
}
/***************************************************************************
Decrements the reference count of a session and closes it, if desired. */
UNIV_INLINE
......@@ -311,6 +282,7 @@ sess_open(
/*************************************************************************
Closes a session, freeing the memory occupied by it. */
static
void
sess_close(
/*=======*/
......@@ -595,330 +567,6 @@ sess_error_low(
NULL, NULL, NULL);
}
/*************************************************************************
Raises an SQL error. */
void
sess_raise_error_low(
/*=================*/
trx_t* trx, /* in: transaction */
ulint err_no, /* in: error number */
ulint type, /* in: more info of the error, or 0 */
dict_table_t* table, /* in: dictionary table or NULL */
dict_index_t* index, /* in: table index or NULL */
dtuple_t* tuple, /* in: tuple to insert or NULL */
rec_t* rec, /* in: record or NULL */
char* err_str)/* in: arbitrary null-terminated error string,
or NULL */
{
char* str;
ulint len;
ut_ad(mutex_own(&kernel_mutex));
str = mem_alloc(64000);
len = 0;
len += sprintf(str + len, "Error number: %lu", err_no);
if (type) {
len += sprintf(str + len, ", type: %lu", type);
}
if (table) {
len += sprintf(str + len, ", table: %s", table->name);
}
if (index) {
len += sprintf(str + len, ", index: %s", index->name);
}
if (tuple) {
len += sprintf(str + len, ", tuple:");
len += dtuple_sprintf(str + len, 8192, tuple);
}
if (rec) {
len += sprintf(str + len, ", record:");
len += rec_sprintf(str + len, 8192, rec);
}
if (err_str) {
len += sprintf(str + len, ", %s", err_str);
}
str[len] = '\0';
ut_a(len < 64000);
if (trx->sess) {
sess_error_low(trx->sess, err_no, str);
} else {
mem_free(str);
}
}
/***************************************************************************
Processes a client message which is part of a bigger message. */
static
ibool
sess_receive_msg_part(
/*==================*/
/* TRUE if message completed */
sess_t* sess, /* in: session */
byte* str, /* in: message string */
ulint len) /* in: message length */
{
ulint cont;
cont = sess_cli_msg_get_continue(str);
ut_ad(cont != SESS_MSG_SINGLE_PART);
if (cont == SESS_MSG_FIRST_PART) {
if (sess->big_msg) {
sess_error_low(sess, SESS_ERR_MSG_LOST, NULL);
return(FALSE);
}
sess->big_msg_size = 1024 * sess_cli_msg_get_cont_size(str);
sess->big_msg = mem_alloc(sess->big_msg_size);
if (sess->big_msg == NULL) {
sess_error_low(sess, SESS_ERR_OUT_OF_MEMORY, NULL);
return(FALSE);
}
ut_memcpy(sess->big_msg, str, len);
sess->big_msg_len = len;
return(FALSE);
} else {
if (sess->big_msg == NULL) {
sess_error_low(sess, SESS_ERR_MSG_LOST, NULL);
return(FALSE);
}
ut_memcpy(sess->big_msg + sess->big_msg_len,
str + SESS_CLI_MSG_DATA, len - SESS_CLI_MSG_DATA);
sess->big_msg_len += len - SESS_CLI_MSG_DATA;
if (cont == SESS_MSG_MIDDLE_PART) {
return(FALSE);
}
return(TRUE);
}
}
/***************************************************************************
Processes a client message which requires SQL parsing. This function decodes
the client message built in SQLPrepare. NOTE: The kernel mutex is temporarily
released within this function. */
static
void
sess_receive_prepare(
/*=================*/
sess_t* sess, /* in: session */
byte* cli_msg,/* in: client message */
ulint len) /* in: message length */
{
dulint error_count;
que_t* graph;
byte msg[ODBC_DATAGRAM_SIZE];
UT_NOT_USED(len);
ut_ad(mutex_own(&kernel_mutex));
error_count = sess->error_count;
/* Make sure the session object is not freed during the parsing */
sess_refer_count_inc(sess);
/* We release the kernel mutex before parsing the command: this is
to reduce contention on the kernel mutex */
mutex_exit(&kernel_mutex);
/* printf("To parse query %s\n", (char*)(cli_msg + SESS_CLI_MSG_DATA)); */
graph = pars_sql((char*)(cli_msg + SESS_CLI_MSG_DATA));
mutex_enter(&kernel_mutex);
if (graph == NULL) {
/* Error in parsing */
sess_error_low(sess, SESS_ERR_SQL_ERROR, NULL);
sess_refer_count_dec(sess);
ut_error;
return;
}
if (!UT_DULINT_EQ(error_count, sess->error_count)) {
/* An error, or an asyncronous signal on the session happened
when the kernel mutex was not reserved: discard graph */
graph->state = QUE_FORK_INVALID;
que_graph_try_free(graph);
sess_refer_count_dec(sess);
ut_error;
return;
}
UT_LIST_ADD_LAST(graphs, sess->graphs, graph);
graph->id = sess->next_graph_id;
sess->next_graph_id++;
/* Tell the client that the preparation succeeded and communicate info
about the possible query parameters: the message will be decoded in
SQLPrepare */
ut_ad(sess->client_waits);
sess_srv_msg_init(sess, msg, SESS_SRV_SUCCESS);
mach_write_to_4(msg + SESS_SRV_MSG_DATA, graph->id);
mutex_exit(&kernel_mutex);
len = pars_write_query_param_info(msg + SESS_SRV_MSG_DATA + 4, graph);
mutex_enter(&kernel_mutex);
sess_srv_msg_send(sess, msg, SESS_SRV_MSG_DATA + 4 + len,
SESS_RELEASE_KERNEL);
sess_refer_count_dec(sess);
}
/***************************************************************************
Processes a client message which does not require SQL parsing. This function
decodes the client message built in SQLExecute. */
static
void
sess_receive_command(
/*=================*/
sess_t* sess, /* in: session */
byte* cli_msg,/* in: client message */
ulint len, /* in: message length */
ulint type) /* in: message type */
{
proc_node_t* proc_node;
call_node_t* call_node;
dict_proc_t* dict_proc;
que_thr_t* thr;
que_t* graph;
ulint stat_id;
UT_NOT_USED(len);
UT_NOT_USED(type);
ut_ad(mutex_own(&kernel_mutex));
sess->client_waits = TRUE;
stat_id = mach_read_from_4(cli_msg + SESS_CLI_MSG_DATA);
/* Look for the statement from the list of query graphs */
graph = UT_LIST_GET_FIRST(sess->graphs);
while (graph != NULL) {
if (graph->id == stat_id) {
break;
}
graph = UT_LIST_GET_NEXT(graphs, graph);
}
if (graph == NULL) {
/* Could not find the right graph: error */
sess_error_low(sess, SESS_ERR_STMT_NOT_FOUND, NULL);
return;
}
if (graph->state != QUE_FORK_COMMAND_WAIT) {
sess_error_low(sess, SESS_ERR_STMT_NOT_READY, NULL);
return;
}
/* printf("To execute stat %lu\n", stat_id); */
if (graph->fork_type == QUE_FORK_PROCEDURE_CALL) {
/* It is a stored procedure call: retrieve a parsed copy of
the procedure from the dictionary cache */
mutex_exit(&kernel_mutex);
call_node = que_fork_get_child(graph);
graph = dict_procedure_reserve_parsed_copy(
call_node->procedure_def);
graph->trx = sess->trx;
/* Retrieve the procedure input parameters from the message */
pars_proc_read_input_params_from_buf(graph,
cli_msg + SESS_CLI_MSG_DATA + 4);
mutex_enter(&kernel_mutex);
} else {
/* It is a create procedure command: add the procedure to the
dictionary cache */
ut_ad(graph->fork_type == QUE_FORK_PROCEDURE);
mutex_exit(&kernel_mutex);
proc_node = que_fork_get_child(graph);
dict_proc = dict_mem_procedure_create(proc_node->proc_id->name,
proc_node->sym_tab->sql_string,
graph);
dict_procedure_add_to_cache(dict_proc);
mutex_enter(&kernel_mutex);
sess_srv_msg_send_simple(sess, SESS_SRV_SUCCESS,
SESS_RELEASE_KERNEL);
return;
}
/* Choose a query thread for execution */
thr = que_fork_start_command(graph, SESS_COMM_EXECUTE, 0);
ut_ad(thr);
sess->trx->graph = graph;
mutex_exit(&kernel_mutex);
/* Run query threads with the kernel mutex released */
que_run_threads(thr);
mutex_enter(&kernel_mutex);
}
/***************************************************************************
When a command has been completed, this function sends the message about it
to the client. */
......@@ -936,239 +584,3 @@ sess_command_completed_message(
SESS_RELEASE_KERNEL);
mutex_exit(&kernel_mutex);
}
/***************************************************************************
Processes a break message from the client. */
static
void
sess_receive_break(
/*===============*/
sess_t* sess) /* in: session */
{
ut_ad(mutex_own(&kernel_mutex));
/* Rollback the latest incomplete SQL statement */
sess_error_low(sess, SESS_ERR_BREAK_BY_CLIENT, NULL);
}
/***************************************************************************
Processes a message from a client. NOTE: Releases the kernel mutex temporarily
when parsing an SQL string. */
void
sess_receive_msg_rel_kernel(
/*========================*/
sess_t* sess, /* in: session */
byte* str, /* in: message string */
ulint len) /* in: message length */
{
dulint msg_no;
ulint msg_type;
ulint cont;
ibool is_big_msg = FALSE;
ibool client_waited;
ut_ad(mutex_own(&kernel_mutex));
ut_ad(!sess->disconnecting);
client_waited = sess->client_waits;
sess->client_waits = TRUE;
if (sess->state == SESS_ERROR) {
/* Send a buffered error message */
sess_srv_msg_send_error(sess);
return;
}
if (FALSE == sess_cli_msg_check_consistency(str, len)) {
/* Message from the client was corrupted */
sess_error_low(sess, SESS_ERR_MSG_CORRUPTED, NULL);
return;
}
msg_no = sess_cli_msg_get_msg_no(str);
UT_DULINT_INC(sess->msgs_recv);
if (!UT_DULINT_EQ(msg_no, sess->msgs_recv)) {
sess_error_low(sess, SESS_ERR_MSG_LOST, NULL);
sess->msgs_recv = msg_no;
return;
}
msg_type = sess_cli_msg_get_type(str);
if (msg_type == SESS_CLI_BREAK_EXECUTION) {
sess_receive_break(sess);
return;
}
if (client_waited) {
/* Client sent an extraneous message which is not a break
command: an error */
sess_error_low(sess, SESS_ERR_EXTRANEOUS_MSG, NULL);
return;
}
/*-----------------------------------------------------------*/
/* Handle big messages */
cont = sess_cli_msg_get_continue(str);
if (cont == SESS_MSG_SINGLE_PART) {
if (sess->big_msg) {
sess_error_low(sess, SESS_ERR_MSG_LOST, NULL);
return;
}
} else {
ut_error; /* Not in use */
is_big_msg = sess_receive_msg_part(sess, str, len);
if (is_big_msg) {
str = sess->big_msg;
len = sess->big_msg_len;
sess->big_msg = NULL;
} else {
return;
}
}
/*-----------------------------------------------------------*/
/* The session has received a complete message from the client */
ut_ad(!UT_LIST_GET_FIRST((sess->trx)->signals));
if (msg_type == SESS_CLI_PREPARE) {
/* Note that the kernel mutex is temporarily released when
the SQL string is parsed */
sess_receive_prepare(sess, str, len);
} else {
/* Note that the kernel mutex is temporarily released when the
command is executed */
sess_receive_command(sess, str, len, msg_type);
}
if (is_big_msg) {
mem_free(str);
}
}
/***********************************************************************
Opens a new connection and creates a session. */
static
ibool
sess_open_connection(
/*=================*/
byte* str, /* in: message string */
ulint len, /* in: string length */
byte* addr, /* in: user address string */
ulint alen) /* in: user address length */
{
dulint sess_id;
sess_t* sess;
sess_id = mach_read_from_8(str + SESS_CLI_MSG_SESS_ID);
if (!(UT_DULINT_EQ(sess_id, ut_dulint_zero))
|| !(sess_cli_msg_get_type(str) == SESS_CLI_CONNECT)) {
/* It is not a valid connect message */
return(FALSE);
}
ut_a(len == SESS_CLI_MSG_DATA);
sess = sess_open(srv_sys->endpoint, addr, alen);
sess_srv_msg_send_simple(sess, SESS_SRV_ACCEPT_CONNECT,
SESS_NOT_RELEASE_KERNEL);
return(TRUE);
}
/***********************************************************************
Starts a new connection and a session, or starts a query based on a client
message. This is called by a SRV_COM thread. */
void
sess_process_cli_msg(
/*=================*/
byte* str, /* in: message string */
ulint len, /* in: string length */
byte* addr, /* in: address string */
ulint alen) /* in: address length */
{
sess_t* sess;
ibool success;
UT_NOT_USED(addr);
UT_NOT_USED(alen);
mutex_enter(&kernel_mutex);
sess = sess_cli_msg_get_sess(str, len);
if (sess == NULL) {
/* There was no matching session */
if (sess_cli_msg_check_consistency(str, len)) {
/* As the message is consistent, it may be a connect
message */
/* printf("%s\n", addr); */
success = sess_open_connection(str, len, addr, alen);
if (success) {
mutex_exit(&kernel_mutex);
return;
}
}
/* Could not make sense of the message: write an error entry
to the system error log */
/* srv_err_log_insert(
"MESSAGE SENT TO AN UNKNOWN SESSION");*/
ut_error;
mutex_exit(&kernel_mutex);
return;
}
if (sess->disconnecting) {
/* srv_err_log_insert(
"MESSAGE SENT TO A DISCONNECTING SESSION");*/
ut_error;
mutex_exit(&kernel_mutex);
return;
}
sess_receive_msg_rel_kernel(sess, str, len);
mutex_exit(&kernel_mutex);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment