Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mariadb
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
mariadb
Commits
ec06c782
Commit
ec06c782
authored
Feb 19, 2004
by
marko@hundin.mysql.fi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Remove unnecessary files and functions
parent
38b46781
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
8 additions
and
7030 deletions
+8
-7030
innobase/fsp/trash/FSP0FSP.C
innobase/fsp/trash/FSP0FSP.C
+0
-3100
innobase/include/usr0sess.h
innobase/include/usr0sess.h
+0
-43
innobase/lock/lock0lock.c
innobase/lock/lock0lock.c
+0
-5
innobase/log/trash/log0trsh.c
innobase/log/trash/log0trsh.c
+0
-648
innobase/os/os0fileold.c
innobase/os/os0fileold.c
+0
-1956
innobase/srv/srv0srv.c
innobase/srv/srv0srv.c
+0
-677
innobase/trx/trx0trx.c
innobase/trx/trx0trx.c
+0
-5
innobase/usr/usr0sess.c
innobase/usr/usr0sess.c
+8
-596
No files found.
innobase/fsp/trash/FSP0FSP.C
deleted
100644 → 0
View file @
38b46781
/**********************************************************************
File-space management
(c) 1995 Innobase Oy
Created 11/29/1995 Heikki Tuuri
***********************************************************************/
#include "fsp0fsp.h"
#include "buf0buf.h"
#include "fil0fil.h"
#include "sync0sync.h"
#include "mtr0log.h"
#include "fut0fut.h"
#include "ut0byte.h"
/* The data structures in files are defined just as byte strings in C */
typedef
byte
fsp_header_t
;
typedef
byte
xdes_t
;
typedef
byte
fseg_page_header_t
;
/* Rw-latch protecting the whole file space system */
rw_lock_t
fsp_latch
;
/* SPACE HEADER
============
File space header data structure: this data structure
is contained in the first page of a space. The space for this header
is reserved in every extent descriptor page, but used only in the first. */
#define FSP_HEADER_OFFSET FIL_PAGE_DATA
/* Offset of the space header
within a file page */
/*-------------------------------------*/
#define FSP_SIZE 0
/* Current
size of the space in pages */
#define FSP_FREE_LIMIT 4
/* Minimum page number for which
the free list has not been initialized:
the pages >= this limit are, by
definition, free */
#define FSP_LOWEST_NO_WRITE 8
/* The lowest page offset for which
the page has not been written to disk
(if it has been written, we know
that the OS has really reserved
the physical space for the page) */
#define FSP_FRAG_N_USED 12
/* number of used pages in
the FSP_FREE_FRAG list */
#define FSP_FREE 16
/* list of free extents */
#define FSP_FREE_FRAG (16 + FLST_BASE_NODE_SIZE)
/* list of partially free extents not
belonging to any segment */
#define FSP_FULL_FRAG (16 + 2 * FLST_BASE_NODE_SIZE)
/* list of full extents not belonging
to any segment */
#define FSP_SEG_ID (16 + 3 * FLST_BASE_NODE_SIZE)
/* 8 bytes which give the first
#define FSP_SEG_HDRS_FULL (24 + 3 * FLST_BASE_NODE_SIZE)
/* list of pages containing segment
headers, where all the segment header
slots are reserved */
#define FSP_SEG_HDRS_FREE (24 + 4 * FLST_BASE_NODE_SIZE)
/* list of pages containing segment
headers, where not all the segment
header slots are reserved */
/*-------------------------------------*/
/* File space header size */
#define FSP_HEADER_SIZE (24 + 4 * FLST_BASE_NODE_SIZE)
#define FSP_FREE_ADD 4
/* this many free extents are added
to the free list from above
FSP_FREE_LIMIT at a time */
/* SEGMENT HEADER
==============
Segment header which is created for each segment in a tablespace, on a
page of its own. NOTE: in purge we assume that a segment having only one
currently used page can be freed in a few steps, so that the freeing cannot
fill the file buffer with bufferfixed file pages. */
#define FSEG_HDR_PAGE_NODE FSEG_PAGE_DATA
/* the list node for linking
segment header pages */
#define FSEG_ARR_OFFSET (FSEG_PAGE_DATA + FLST_NODE_SIZE)
/*-------------------------------------*/
#define FSEG_ID 0
/* 8 bytes of segment id: if this is
ut_dulint_zero, it means that the
header is unused */
#define FSEG_NOT_FULL_N_USED 8
/* number of used segment pages in
the FSEG_NOT_FULL list */
#define FSEG_FREE 12
/* list of free extents of this
segment */
#define FSEG_NOT_FULL (12 + FLST_BASE_NODE_SIZE)
/* list of partially free extents */
#define FSEG_FULL (12 + 2 * FLST_BASE_NODE_SIZE)
/* list of full extents */
#define FSEG_MAGIC_N (12 + 3 * FLST_BASE_NODE_SIZE)
/* magic number used in debugging */
#define FSEG_FRAG_ARR (16 + 3 * FLST_BASE_NODE_SIZE)
/* array of individual pages
belonging to this segment in fsp
fragment extent lists */
#define FSEG_FRAG_ARR_N_SLOTS (FSP_EXTENT_SIZE / 2)
/* number of slots in the array for
the fragment pages */
#define FSEG_FRAG_SLOT_SIZE 4
/* a fragment page slot contains its
page number within space, FIL_NULL
means that the slot is not in use */
/*-------------------------------------*/
#define FSEG_HEADER_SIZE (16 + 3 * FLST_BASE_NODE_SIZE +\
FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
#define FSP_SEG_HDRS_PER_PAGE ((UNIV_PAGE_SIZE - FSEG_ARR_OFFSET - 10)\
/ FSEG_HEADER_SIZE)
/* Number of segment headers which fit on a
single page */
#define FSEG_MAGIC_N_VALUE 97937874
#define FSEG_FILLFACTOR 8
/* If this value is x, then if
the number of unused but reserved
pages in a segment is less than
reserved pages * 1/x, and there are
at least FSEG_FRAG_LIMIT used pages,
then we allow a new empty extent to
be added to the segment in
fseg_alloc_free_page. Otherwise, we
use unused pages of the segment. */
#define FSEG_FRAG_LIMIT FSEG_FRAG_N_ARR_SLOTS
/* If the segment has >= this many
used pages, it may be expanded by
allocating extents to the segment;
until that only individual fragment
pages are allocated from the space */
#define FSEG_FREE_LIST_LIMIT 40
/* If the reserved size of a segment
is at least this many extents, we
allow extents to be put to the free
list of the extent: at most
FSEG_FREE_LIST_MAX_LEN many */
#define FSEG_FREE_LIST_MAX_LEN 4
/* EXTENT DESCRIPTOR
=================
File extent descriptor data structure: contains bits to tell
which pages in the extent are free and which contain old tuple
version to clean. */
/*-------------------------------------*/
#define XDES_ID 0
/* The identifier of the segment
to which this extent belongs */
#define XDES_FLST_NODE 8
/* The list node data structure
for the descriptors */
#define XDES_STATE (FLST_NODE_SIZE + 8)
/* contains state information
of the extent */
#define XDES_BITMAP (FLST_NODE_SIZE + 12)
/* Descriptor bitmap of the pages
in the extent*/
/*-------------------------------------*/
#define XDES_BITS_PER_PAGE 2
/* How many bits are there per page */
#define XDES_FREE_BIT 0
/* Index of the bit which tells if
the page is free */
#define XDES_CLEAN_BIT 1
/* Index of the bit which tells if
there are old versions of tuples
on the page */
/* States of a descriptor */
#define XDES_FREE 1
/* extent is in free list of space */
#define XDES_FREE_FRAG 2
/* extent is in free fragment list of
space */
#define XDES_FULL_FRAG 3
/* extent is in full fragment list of
space */
#define XDES_FSEG 4
/* extent belongs to a segment*/
/* Number of pages described in a single descriptor page:
currently each page description takes less than
1 byte. */
#define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE
/* File extent data structure size in bytes. The "+ 7 ) / 8"
part in the definition rounds the number of bytes upward. */
#define XDES_SIZE (XDES_BITMAP +\
(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE + 7) / 8)
/* Offset of the descriptor array on a descriptor page */
#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
/**************************************************************************
Returns an extent to the free list of a space. */
static
void
fsp_free_extent
(
/*============*/
ulint
space
,
/* in: space id */
ulint
page
,
/* in: page offset in the extent */
mtr_t
*
mtr
);
/* in: mtr */
/**************************************************************************
Frees an extent of a segment to the space free list. */
static
void
fseg_free_extent
(
/*=============*/
fseg_header_t
*
seg_header
,
/* in: segment header */
ulint
space
,
/* in: space id */
ulint
page
,
/* in: page offset in the extent */
mtr_t
*
mtr
);
/* in: mtr handle */
/**************************************************************************
Calculates the number of pages reserved by a segment, and how
many pages are currently used. */
static
ulint
fseg_n_reserved_pages_low
(
/*======================*/
/* out: number of reserved pages */
fseg_header_t
*
header
,
/* in: segment header */
ulint
*
used
,
/* out: number of pages used (<= reserved) */
mtr_t
*
mtr
);
/* in: mtr handle */
/************************************************************************
Marks a page used. The page must reside within the extents of the given
segment. */
static
void
fseg_mark_page_used
(
/*================*/
fseg_header_t
*
seg_header
,
/* in: segment header */
ulint
space
,
/* in: space id */
ulint
page
,
/* in: page offset */
mtr_t
*
mtr
);
/* in: mtr */
/**************************************************************************
Frees a single page of a segment. */
static
void
fseg_free_page_low
(
/*===============*/
fseg_header_t
*
seg_header
,
/* in: segment header */
ulint
space
,
/* in: space id */
ulint
page
,
/* in: page offset */
mtr_t
*
mtr
);
/* in: mtr handle */
/**************************************************************************
Returns the first extent descriptor for a segment. We think of the extent
lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
-> FSEG_FREE. */
static
xdes_t
*
fseg_get_first_extent
(
/*==================*/
/* out: the first extent descriptor, or NULL if
none */
fseg_header_t
*
header
,
/* in: segment header */
mtr_t
*
mtr
);
/* in: mtr */
/**************************************************************************
Puts new extents to the free list if
there are free extents above the free limit. If an extent happens
to contain an extent descriptor page, the extent is put to
the FSP_FREE_FRAG list with the page marked as used. */
static
void
fsp_fill_free_list
(
/*===============*/
ulint
space
,
/* in: space */
fsp_header_t
*
header
,
/* in: space header */
mtr_t
*
mtr
);
/* in: mtr */
/**************************************************************************
Gets a descriptor bit of a page. */
UNIV_INLINE
bool
xdes_get_bit
(
/*=========*/
/* out: TRUE if free */
xdes_t
*
descr
,
/* in: descriptor */
ulint
bit
,
/* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
ulint
offset
,
/* in: page offset within extent:
0 ... FSP_EXTENT_SIZE - 1 */
mtr_t
*
mtr
)
/* in: mtr */
{
ulint
index
;
ulint
byte_index
;
ulint
bit_index
;
ut_ad
(
mtr_memo_contains
(
mtr
,
buf_block_align
(
descr
),
MTR_MEMO_PAGE_X_LOCK
));
ut_ad
((
bit
==
XDES_FREE_BIT
)
||
(
bit
==
XDES_CLEAN_BIT
));
ut_ad
(
offset
<
FSP_EXTENT_SIZE
);
index
=
bit
+
XDES_BITS_PER_PAGE
*
offset
;
byte_index
=
index
/
8
;
bit_index
=
index
%
8
;
return
(
ut_bit_get_nth
(
mtr_read_ulint
(
descr
+
XDES_BITMAP
+
byte_index
,
MLOG_1BYTE
,
mtr
),
bit_index
));
}
/**************************************************************************
Sets a descriptor bit of a page. */
UNIV_INLINE
void
xdes_set_bit
(
/*=========*/
xdes_t
*
descr
,
/* in: descriptor */
ulint
bit
,
/* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
ulint
offset
,
/* in: page offset within extent:
0 ... FSP_EXTENT_SIZE - 1 */
bool
val
,
/* in: bit value */
mtr_t
*
mtr
)
/* in: mtr */
{
ulint
index
;
ulint
byte_index
;
ulint
bit_index
;
ulint
descr_byte
;
ut_ad
(
mtr_memo_contains
(
mtr
,
buf_block_align
(
descr
),
MTR_MEMO_PAGE_X_LOCK
));
ut_ad
((
bit
==
XDES_FREE_BIT
)
||
(
bit
==
XDES_CLEAN_BIT
));
ut_ad
(
offset
<
FSP_EXTENT_SIZE
);
index
=
bit
+
XDES_BITS_PER_PAGE
*
offset
;
byte_index
=
index
/
8
;
bit_index
=
index
%
8
;
descr_byte
=
mtr_read_ulint
(
descr
+
XDES_BITMAP
+
byte_index
,
MLOG_1BYTE
,
mtr
);
descr_byte
=
ut_bit_set_nth
(
descr_byte
,
bit_index
,
val
);
mlog_write_ulint
(
descr
+
XDES_BITMAP
+
byte_index
,
descr_byte
,
MLOG_1BYTE
,
mtr
);
}
/**************************************************************************
Looks for a descriptor bit having the desired value. Starts from hint
and scans upward; at the end of the extent the search is wrapped to
the start of the extent. */
UNIV_INLINE
ulint
xdes_find_bit
(
/*==========*/
/* out: bit index of the bit,
ULINT_UNDEFINED if not found */
xdes_t
*
descr
,
/* in: descriptor */
ulint
bit
,
/* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
bool
val
,
/* in: desired bit value */
ulint
hint
,
/* in: hint of which bit position would be
desirable */
mtr_t
*
mtr
)
/* in: mtr */
{
ulint
i
;
ut_ad
(
descr
&&
mtr
);
ut_ad
(
val
<=
TRUE
);
ut_ad
(
hint
<
FSP_EXTENT_SIZE
);
ut_ad
(
mtr_memo_contains
(
mtr
,
buf_block_align
(
descr
),
MTR_MEMO_PAGE_X_LOCK
));
for
(
i
=
hint
;
i
<
FSP_EXTENT_SIZE
;
i
++
)
{
if
(
val
==
xdes_get_bit
(
descr
,
bit
,
i
,
mtr
))
{
return
(
i
);
}
}
for
(
i
=
0
;
i
<
hint
;
i
++
)
{
if
(
val
==
xdes_get_bit
(
descr
,
bit
,
i
,
mtr
))
{
return
(
i
);
}
}
return
(
ULINT_UNDEFINED
);
}
/**************************************************************************
Looks for a descriptor bit having the desired value. Scans the extent in
a direction opposite to xdes_find_bit. */
UNIV_INLINE
ulint
xdes_find_bit_downward
(
/*===================*/
/* out: bit index of the bit,
ULINT_UNDEFINED if not found */
xdes_t
*
descr
,
/* in: descriptor */
ulint
bit
,
/* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
bool
val
,
/* in: desired bit value */
ulint
hint
,
/* in: hint of which bit position would be
desirable */
mtr_t
*
mtr
)
/* in: mtr */
{
ulint
i
;
ut_ad
(
descr
&&
mtr
);
ut_ad
(
val
<=
TRUE
);
ut_ad
(
hint
<
FSP_EXTENT_SIZE
);
ut_ad
(
mtr_memo_contains
(
mtr
,
buf_block_align
(
descr
),
MTR_MEMO_PAGE_X_LOCK
));
for
(
i
=
hint
+
1
;
i
>
0
;
i
--
)
{
if
(
val
==
xdes_get_bit
(
descr
,
bit
,
i
-
1
,
mtr
))
{
return
(
i
-
1
);
}
}
for
(
i
=
FSP_EXTENT_SIZE
-
1
;
i
>
hint
;
i
--
)
{
if
(
val
==
xdes_get_bit
(
descr
,
bit
,
i
,
mtr
))
{
return
(
i
);
}
}
return
(
ULINT_UNDEFINED
);
}
/**************************************************************************
Returns the number of used pages in a descriptor. */
UNIV_INLINE
ulint
xdes_get_n_used
(
/*============*/
/* out: number of pages used */
xdes_t
*
descr
,
/* in: descriptor */
mtr_t
*
mtr
)
/* in: mtr */
{
ulint
i
;
ulint
count
=
0
;
ut_ad
(
descr
&&
mtr
);
ut_ad
(
mtr_memo_contains
(
mtr
,
buf_block_align
(
descr
),
MTR_MEMO_PAGE_X_LOCK
));
for
(
i
=
0
;
i
<
FSP_EXTENT_SIZE
;
i
++
)
{
if
(
FALSE
==
xdes_get_bit
(
descr
,
XDES_FREE_BIT
,
i
,
mtr
))
{
count
++
;
}
}
return
(
count
);
}
/**************************************************************************
Returns true if extent contains no used pages. */
UNIV_INLINE
bool
xdes_is_free
(
/*=========*/
/* out: TRUE if totally free */
xdes_t
*
descr
,
/* in: descriptor */
mtr_t
*
mtr
)
/* in: mtr */
{
if
(
0
==
xdes_get_n_used
(
descr
,
mtr
))
{
return
(
TRUE
);
}
else
{
return
(
FALSE
);
}
}
/**************************************************************************
Returns true if extent contains no free pages. */
UNIV_INLINE
bool
xdes_is_full
(
/*=========*/
/* out: TRUE if full */
xdes_t
*
descr
,
/* in: descriptor */
mtr_t
*
mtr
)
/* in: mtr */
{
if
(
FSP_EXTENT_SIZE
==
xdes_get_n_used
(
descr
,
mtr
))
{
return
(
TRUE
);
}
else
{
return
(
FALSE
);
}
}
/**************************************************************************
Sets the state of an xdes. */
UNIV_INLINE
void
xdes_set_state
(
/*===========*/
xdes_t
*
descr
,
/* in: descriptor */
ulint
state
,
/* in: state to set */
mtr_t
*
mtr
)
/* in: mtr handle */
{
ut_ad
(
descr
&&
mtr
);
ut_ad
(
state
>=
XDES_FREE
);
ut_ad
(
state
<=
XDES_FSEG
);
ut_ad
(
mtr_memo_contains
(
mtr
,
buf_block_align
(
descr
),
MTR_MEMO_PAGE_X_LOCK
));
mlog_write_ulint
(
descr
+
XDES_STATE
,
state
,
MLOG_4BYTES
,
mtr
);
}
/**************************************************************************
Gets the state of an xdes. */
UNIV_INLINE
ulint
xdes_get_state
(
/*===========*/
/* out: state */
xdes_t
*
descr
,
/* in: descriptor */
mtr_t
*
mtr
)
/* in: mtr handle */
{
ut_ad
(
descr
&&
mtr
);
ut_ad
(
mtr_memo_contains
(
mtr
,
buf_block_align
(
descr
),
MTR_MEMO_PAGE_X_LOCK
));
return
(
mtr_read_ulint
(
descr
+
XDES_STATE
,
MLOG_4BYTES
,
mtr
));
}
/**************************************************************************
Inits an extent descriptor to free and clean state. */
UNIV_INLINE
void
xdes_init
(
/*======*/
xdes_t
*
descr
,
/* in: descriptor */
mtr_t
*
mtr
)
/* in: mtr */
{
ulint
i
;
ut_ad
(
descr
&&
mtr
);
ut_ad
(
mtr_memo_contains
(
mtr
,
buf_block_align
(
descr
),
MTR_MEMO_PAGE_X_LOCK
));
for
(
i
=
0
;
i
<
FSP_EXTENT_SIZE
;
i
++
)
{
xdes_set_bit
(
descr
,
XDES_FREE_BIT
,
i
,
TRUE
,
mtr
);
xdes_set_bit
(
descr
,
XDES_CLEAN_BIT
,
i
,
TRUE
,
mtr
);
}
xdes_set_state
(
descr
,
XDES_FREE
,
mtr
);
}
/************************************************************************
Calculates the page where the descriptor of a page resides. */
UNIV_INLINE
ulint
xdes_calc_descriptor_page
(
/*======================*/
/* out: descriptor page offset */
ulint
offset
)
/* in: page offset */
{
ut_ad
(
UNIV_PAGE_SIZE
>
XDES_ARR_OFFSET
+
(
XDES_DESCRIBED_PER_PAGE
/
FSP_EXTENT_SIZE
)
*
XDES_SIZE
);
return
(
ut_2pow_round
(
offset
,
XDES_DESCRIBED_PER_PAGE
));
}
/************************************************************************
Calculates the descriptor index within a descriptor page. */
UNIV_INLINE
ulint
xdes_calc_descriptor_index
(
/*=======================*/
/* out: descriptor index */
ulint
offset
)
/* in: page offset */
{
return
(
ut_2pow_remainder
(
offset
,
XDES_DESCRIBED_PER_PAGE
)
/
FSP_EXTENT_SIZE
);
}
/************************************************************************
Gets pointer to a the extent descriptor of a page. The page where the
extent descriptor resides is x-locked. If the page offset is equal to the free
limit of the space, adds new extents from above the free limit
to the space free list, if not free limit == space size. This adding
is necessary to make the descriptor defined, as they are uninitialized
above the free limit. */
UNIV_INLINE
xdes_t
*
xdes_get_descriptor_with_space_hdr
(
/*===============================*/
/* out: pointer to the extent descriptor,
NULL if the page does not exist in the
space or if offset > free limit */
fsp_header_t
*
sp_header
,
/* in: space header, x-latched */
ulint
space
,
/* in: space id */
ulint
offset
,
/* in: page offset;
if equal to the free limit,
we try to add new extents to
the space free list */
mtr_t
*
mtr
)
/* in: mtr handle */
{
ulint
limit
;
ulint
size
;
buf_block_t
*
buf_page
;
ulint
descr_page_no
;
page_t
*
descr_page
;
ut_ad
(
mtr
);
ut_ad
(
mtr_memo_contains
(
mtr
,
&
fsp_latch
,
MTR_MEMO_X_LOCK
));
/* Read free limit and space size */
limit
=
mtr_read_ulint
(
sp_header
+
FSP_FREE_LIMIT
,
MLOG_4BYTES
,
mtr
);
size
=
mtr_read_ulint
(
sp_header
+
FSP_SIZE
,
MLOG_4BYTES
,
mtr
);
/* If offset is >= size or > limit, return NULL */
if
((
offset
>=
size
)
||
(
offset
>
limit
))
{
return
(
NULL
);
}
/* If offset is == limit, fill free list of the space. */
if
(
offset
==
limit
)
{
fsp_fill_free_list
(
space
,
sp_header
,
mtr
);
}
descr_page_no
=
xdes_calc_descriptor_page
(
offset
);
if
(
descr_page_no
==
0
)
{
/* It is on the space header page */
descr_page
=
buf_frame_align
(
sp_header
);
}
else
{
buf_page
=
buf_page_get
(
space
,
descr_page_no
,
mtr
);
buf_page_x_lock
(
buf_page
,
mtr
);
descr_page
=
buf_block_get_frame
(
buf_page
);
}
return
(
descr_page
+
XDES_ARR_OFFSET
+
XDES_SIZE
*
xdes_calc_descriptor_index
(
offset
));
}
/************************************************************************
Gets pointer to a the extent descriptor of a page. The page where the
extent descriptor resides is x-locked. If the page offset is equal to the free
limit of the space, adds new extents from above the free limit
to the space free list, if not free limit == space size. This adding
is necessary to make the descriptor defined, as they are uninitialized
above the free limit. */
static
xdes_t
*
xdes_get_descriptor
(
/*================*/
/* out: pointer to the extent descriptor,
NULL if the page does not exist in the
space or if offset > free limit */
ulint
space
,
/* in: space id */
ulint
offset
,
/* in: page offset;
if equal to the free limit,
we try to add new extents to
the space free list */
mtr_t
*
mtr
)
/* in: mtr handle */
{
fsp_header_t
*
sp_header
;
buf_block_t
*
block
;
block
=
buf_page_get
(
space
,
0
,
mtr
);
/* get space header */
sp_header
=
FSP_HEADER_OFFSET
+
buf_block_get_frame
(
block
);
buf_page_x_lock
(
block
,
mtr
);
return
(
xdes_get_descriptor_with_space_hdr
(
sp_header
,
space
,
offset
,
mtr
));
}
/************************************************************************
Gets pointer to a the extent descriptor if the file address
of the descriptor list node is known. The page where the
extent descriptor resides is x-locked. */
UNIV_INLINE
xdes_t
*
xdes_lst_get_descriptor
(
/*====================*/
/* out: pointer to the extent descriptor */
ulint
space
,
/* in: space id */
fil_addr_t
lst_node
,
/* in: file address of the list node
contained in the descriptor */
mtr_t
*
mtr
)
/* in: mtr handle */
{
xdes_t
*
descr
;
ut_ad
(
mtr
);
ut_ad
(
mtr_memo_contains
(
mtr
,
&
fsp_latch
,
MTR_MEMO_X_LOCK
));
descr
=
fut_get_ptr_x_lock
(
space
,
lst_node
,
mtr
)
-
XDES_FLST_NODE
;
return
(
descr
);
}
/************************************************************************
Gets pointer to the next descriptor in a descriptor list and x-locks
its page. */
UNIV_INLINE
xdes_t
*
xdes_lst_get_next
(
/*==============*/
xdes_t
*
descr
,
/* in: pointer to a descriptor */
mtr_t
*
mtr
)
/* in: mtr handle */
{
ulint
space
;
ut_ad
(
mtr
&&
descr
);
space
=
buf_page_get_space
(
buf_block_align
(
descr
));
return
(
xdes_lst_get_descriptor
(
space
,
flst_get_next_addr
(
descr
+
XDES_FLST_NODE
,
mtr
),
mtr
));
}
/************************************************************************
Returns page offset of the first page in extent described by a descriptor.
*/
UNIV_INLINE
ulint
xdes_get_offset
(
/*============*/
/* out: offset of the first page in extent */
xdes_t
*
descr
)
/* in: extent descriptor */
{
buf_block_t
*
buf_page
;
ut_ad
(
descr
);
buf_page
=
buf_block_align
(
descr
);
return
(
buf_page_get_offset
(
buf_page
)
+
((
descr
-
buf_frame_align
(
descr
)
-
XDES_ARR_OFFSET
)
/
XDES_SIZE
)
*
FSP_EXTENT_SIZE
);
}
/**************************************************************************
Gets a pointer to the space header and x-locks its page. */
UNIV_INLINE
fsp_header_t
*
fsp_get_space_header
(
/*=================*/
/* out: pointer to the space header, page x-locked */
ulint
id
,
/* in: space id */
mtr_t
*
mtr
)
/* in: mtr */
{
buf_block_t
*
block
;
ut_ad
(
mtr
);
block
=
buf_page_get
(
id
,
0
,
mtr
);
buf_page_x_lock
(
block
,
mtr
);
return
(
FSP_HEADER_OFFSET
+
buf_block_get_frame
(
block
));
}
/**************************************************************************
Initializes the file space system mutex. */
void
fsp_init
(
void
)
/*==========*/
{
rw_lock_create
(
&
fsp_latch
);
}
/**************************************************************************
Initializes the space header of a new created space. */
void
fsp_header_init
(
/*============*/
ulint
space
,
/* in: space id */
ulint
size
,
/* in: current size in blocks */
mtr_t
*
mtr
)
/* in: mini-transaction handle */
{
fsp_header_t
*
header
;
ut_ad
(
mtr
);
mtr_x_lock
(
&
fsp_latch
,
mtr
);
header
=
fsp_get_space_header
(
space
,
mtr
);
mlog_write_ulint
(
header
+
FSP_SIZE
,
size
,
MLOG_4BYTES
,
mtr
);
mlog_write_ulint
(
header
+
FSP_FREE_LIMIT
,
0
,
MLOG_4BYTES
,
mtr
);
mlog_write_ulint
(
header
+
FSP_LOWEST_NO_WRITE
,
0
,
MLOG_4BYTES
,
mtr
);
mlog_write_ulint
(
header
+
FSP_FRAG_N_USED
,
0
,
MLOG_4BYTES
,
mtr
);
flst_init
(
header
+
FSP_FREE
,
mtr
);
flst_init
(
header
+
FSP_FREE_FRAG
,
mtr
);
flst_init
(
header
+
FSP_FULL_FRAG
,
mtr
);
flst_init
(
header
+
FSP_SEG_HDRS_FULL
,
mtr
);
flst_init
(
header
+
FSP_SEG_HDRS_FREE
,
mtr
);
mlog_write_dulint
(
header
+
FSP_SEG_ID
,
ut_dulint_create
(
0
,
1
),
MLOG_8BYTES
,
mtr
);
}
/**************************************************************************
Increases the space size field of a space. */
void
fsp_header_inc_size
(
/*================*/
ulint
space
,
/* in: space id */
ulint
size_inc
,
/* in: size increment in pages */
mtr_t
*
mtr
)
/* in: mini-transaction handle */
{
fsp_header_t
*
header
;
ulint
size
;
ut_ad
(
mtr
);
mtr_x_lock
(
&
fsp_latch
,
mtr
);
header
=
fsp_get_space_header
(
space
,
mtr
);
size
=
mtr_read_ulint
(
header
+
FSP_SIZE
,
MLOG_4BYTES
,
mtr
);
mlog_write_ulint
(
header
+
FSP_SIZE
,
size
+
size_inc
,
MLOG_4BYTES
,
mtr
);
}
/**************************************************************************
Puts new extents to the free list if there are free extents above the free
limit. If an extent happens to contain an extent descriptor page, the extent
is put to the FSP_FREE_FRAG list with the page marked as used. */
static
void
fsp_fill_free_list
(
/*===============*/
ulint
space
,
/* in: space */
fsp_header_t
*
header
,
/* in: space header */
mtr_t
*
mtr
)
/* in: mtr */
{
ulint
limit
;
ulint
size
;
ulint
i
;
xdes_t
*
descr
;
ulint
count
=
0
;
ulint
frag_n_used
;
ut_ad
(
header
&&
mtr
);
/* Check if we can fill free list from above the free list limit */
size
=
mtr_read_ulint
(
header
+
FSP_SIZE
,
MLOG_4BYTES
,
mtr
);
limit
=
mtr_read_ulint
(
header
+
FSP_FREE_LIMIT
,
MLOG_4BYTES
,
mtr
);
i
=
limit
;
while
((
i
+
FSP_EXTENT_SIZE
<=
size
)
&&
(
count
<
FSP_FREE_ADD
))
{
mlog_write_ulint
(
header
+
FSP_FREE_LIMIT
,
i
+
FSP_EXTENT_SIZE
,
MLOG_4BYTES
,
mtr
);
descr
=
xdes_get_descriptor_with_space_hdr
(
header
,
space
,
i
,
mtr
);
xdes_init
(
descr
,
mtr
);
ut_ad
(
XDES_DESCRIBED_PER_PAGE
%
FSP_EXTENT_SIZE
==
0
);
if
(
0
==
i
%
XDES_DESCRIBED_PER_PAGE
)
{
/* The first page in the extent is a descriptor page:
mark it used */
xdes_set_bit
(
descr
,
XDES_FREE_BIT
,
0
,
FALSE
,
mtr
);
xdes_set_state
(
descr
,
XDES_FREE_FRAG
,
mtr
);
flst_add_last
(
header
+
FSP_FREE_FRAG
,
descr
+
XDES_FLST_NODE
,
mtr
);
frag_n_used
=
mtr_read_ulint
(
header
+
FSP_FRAG_N_USED
,
MLOG_4BYTES
,
mtr
);
mlog_write_ulint
(
header
+
FSP_FRAG_N_USED
,
frag_n_used
+
1
,
MLOG_4BYTES
,
mtr
);
}
else
{
flst_add_last
(
header
+
FSP_FREE
,
descr
+
XDES_FLST_NODE
,
mtr
);
count
++
;
}
i
+=
FSP_EXTENT_SIZE
;
}
}
/**************************************************************************
Allocates a new free extent. */
static
xdes_t
*
fsp_alloc_free_extent
(
/*==================*/
/* out: extent descriptor, NULL if cannot
be allocated */
ulint
space
,
/* in: space id */
ulint
hint
,
/* in: hint of which extent would be
desirable: any page offset in the extent
goes; the hint must not be > FSP_FREE_LIMIT */
mtr_t
*
mtr
)
/* in: mtr */
{
fsp_header_t
*
header
;
fil_addr_t
first
;
xdes_t
*
descr
;
ut_ad
(
mtr
);
header
=
fsp_get_space_header
(
space
,
mtr
);
descr
=
xdes_get_descriptor_with_space_hdr
(
header
,
space
,
hint
,
mtr
);
if
(
descr
&&
(
xdes_get_state
(
descr
,
mtr
)
==
XDES_FREE
))
{
/* Ok, we can take this extent */
}
else
{
/* Take the first extent in the free list */
first
=
flst_get_first
(
header
+
FSP_FREE
,
mtr
);
if
(
fil_addr_is_null
(
first
))
{
fsp_fill_free_list
(
space
,
header
,
mtr
);
first
=
flst_get_first
(
header
+
FSP_FREE
,
mtr
);
}
if
(
fil_addr_is_null
(
first
))
{
return
(
NULL
);
/* No free extents left */
}
descr
=
xdes_lst_get_descriptor
(
space
,
first
,
mtr
);
}
flst_remove
(
header
+
FSP_FREE
,
descr
+
XDES_FLST_NODE
,
mtr
);
return
(
descr
);
}
/**************************************************************************
Allocates a single free page from a space. The page is marked as used. */
static
ulint
fsp_alloc_free_page
(
/*================*/
/* out: the page offset, FIL_NULL
if no page could be allocated */
ulint
space
,
/* in: space id */
ulint
hint
,
/* in: hint of which page would be desirable */
mtr_t
*
mtr
)
/* in: mtr handle */
{
fsp_header_t
*
header
;
fil_addr_t
first
;
xdes_t
*
descr
;
ulint
free
;
ulint
frag_n_used
;
ut_ad
(
mtr
);
header
=
fsp_get_space_header
(
space
,
mtr
);
/* Get the hinted descriptor */
descr
=
xdes_get_descriptor_with_space_hdr
(
header
,
space
,
hint
,
mtr
);
if
(
descr
&&
(
xdes_get_state
(
descr
,
mtr
)
==
XDES_FREE_FRAG
))
{
/* Ok, we can take this extent */
}
else
{
/* Else take the first extent in free_frag list */
first
=
flst_get_first
(
header
+
FSP_FREE_FRAG
,
mtr
);
if
(
fil_addr_is_null
(
first
))
{
/* There are no partially full fragments: allocate
a free extent and add it to the FREE_FRAG
list. NOTE that the allocation may have as a
side-effect that an extent containing a descriptor
page is added to the FREE_FRAG list. But we will
allocate our page from the allocated free extent. */
descr
=
fsp_alloc_free_extent
(
space
,
hint
,
mtr
);
if
(
descr
==
NULL
)
{
/* No free space left */
return
(
FIL_NULL
);
}
xdes_set_state
(
descr
,
XDES_FREE_FRAG
,
mtr
);
flst_add_last
(
header
+
FSP_FREE_FRAG
,
descr
+
XDES_FLST_NODE
,
mtr
);
}
else
{
descr
=
xdes_lst_get_descriptor
(
space
,
first
,
mtr
);
}
/* Reset the hint */
hint
=
0
;
}
/* Now we have in descr an extent with at least one free page.
Look for a free page in the extent. */
free
=
xdes_find_bit
(
descr
,
XDES_FREE_BIT
,
TRUE
,
hint
%
FSP_EXTENT_SIZE
,
mtr
);
ut_a
(
free
!=
ULINT_UNDEFINED
);
xdes_set_bit
(
descr
,
XDES_FREE_BIT
,
free
,
FALSE
,
mtr
);
/* Update the FRAG_N_USED field */
frag_n_used
=
mtr_read_ulint
(
header
+
FSP_FRAG_N_USED
,
MLOG_4BYTES
,
mtr
);
frag_n_used
++
;
mlog_write_ulint
(
header
+
FSP_FRAG_N_USED
,
frag_n_used
,
MLOG_4BYTES
,
mtr
);
if
(
xdes_is_full
(
descr
,
mtr
))
{
/* The fragment is full: move it to another list */
flst_remove
(
header
+
FSP_FREE_FRAG
,
descr
+
XDES_FLST_NODE
,
mtr
);
xdes_set_state
(
descr
,
XDES_FULL_FRAG
,
mtr
);
flst_add_last
(
header
+
FSP_FULL_FRAG
,
descr
+
XDES_FLST_NODE
,
mtr
);
mlog_write_ulint
(
header
+
FSP_FRAG_N_USED
,
frag_n_used
-
FSP_EXTENT_SIZE
,
MLOG_4BYTES
,
mtr
);
}
return
(
xdes_get_offset
(
descr
)
+
free
);
}
/**************************************************************************
Frees a single page of a space. The page is marked as free and clean. */
static
void
fsp_free_page
(
/*==========*/
ulint
space
,
/* in: space id */
ulint
page
,
/* in: page offset */
mtr_t
*
mtr
)
/* in: mtr handle */
{
fsp_header_t
*
header
;
xdes_t
*
descr
;
ulint
state
;
ulint
frag_n_used
;
ut_ad
(
mtr
);
header
=
fsp_get_space_header
(
space
,
mtr
);
descr
=
xdes_get_descriptor_with_space_hdr
(
header
,
space
,
page
,
mtr
);
state
=
xdes_get_state
(
descr
,
mtr
);
ut_a
((
state
==
XDES_FREE_FRAG
)
||
(
state
==
XDES_FULL_FRAG
));
ut_a
(
xdes_get_bit
(
descr
,
XDES_FREE_BIT
,
page
%
FSP_EXTENT_SIZE
,
mtr
)
==
FALSE
);
xdes_set_bit
(
descr
,
XDES_FREE_BIT
,
page
%
FSP_EXTENT_SIZE
,
TRUE
,
mtr
);
xdes_set_bit
(
descr
,
XDES_CLEAN_BIT
,
page
%
FSP_EXTENT_SIZE
,
TRUE
,
mtr
);
frag_n_used
=
mtr_read_ulint
(
header
+
FSP_FRAG_N_USED
,
MLOG_4BYTES
,
mtr
);
if
(
state
==
XDES_FULL_FRAG
)
{
/* The fragment was full: move it to another list */
flst_remove
(
header
+
FSP_FULL_FRAG
,
descr
+
XDES_FLST_NODE
,
mtr
);
xdes_set_state
(
descr
,
XDES_FREE_FRAG
,
mtr
);
flst_add_last
(
header
+
FSP_FREE_FRAG
,
descr
+
XDES_FLST_NODE
,
mtr
);
mlog_write_ulint
(
header
+
FSP_FRAG_N_USED
,
frag_n_used
+
FSP_EXTENT_SIZE
-
1
,
MLOG_4BYTES
,
mtr
);
}
else
{
ut_a
(
frag_n_used
>
0
);
mlog_write_ulint
(
header
+
FSP_FRAG_N_USED
,
frag_n_used
-
1
,
MLOG_4BYTES
,
mtr
);
}
if
(
xdes_is_free
(
descr
,
mtr
))
{
/* The extent has become free: move it to another list */
flst_remove
(
header
+
FSP_FREE_FRAG
,
descr
+
XDES_FLST_NODE
,
mtr
);
fsp_free_extent
(
space
,
page
,
mtr
);
}
}
/**************************************************************************
Returns an extent to the free list of a space. */
static
void
fsp_free_extent
(
/*============*/
ulint
space
,
/* in: space id */
ulint
page
,
/* in: page offset in the extent */
mtr_t
*
mtr
)
/* in: mtr */
{
fsp_header_t
*
header
;
xdes_t
*
descr
;
ut_ad
(
mtr
);
header
=
fsp_get_space_header
(
space
,
mtr
);
descr
=
xdes_get_descriptor_with_space_hdr
(
header
,
space
,
page
,
mtr
);
ut_a
(
xdes_get_state
(
descr
,
mtr
)
!=
XDES_FREE
);
xdes_init
(
descr
,
mtr
);
flst_add_last
(
header
+
FSP_FREE
,
descr
+
XDES_FLST_NODE
,
mtr
);
}
/**************************************************************************
Looks for an unused segment header on a segment header page. */
UNIV_INLINE
fseg_header_t
*
fsp_seg_hdr_page_get_nth_hdr
(
/*=========================*/
/* out: segment header */
page_t
*
page
,
/* in: segment header page */
ulint
i
,
/* in: search forward starting from this index */
mtr_t
*
mtr
)
/* in: mini-transaction handle */
{
ut_ad
(
i
<
FSP_SEG_HDRS_PER_PAGE
);
ut_ad
(
mtr_memo_contains
(
mtr
,
page
,
MTR_MEMO_PAGE_X_LOCK
));
return
(
page
+
FSEG_ARR_OFFSET
+
FSEG_HEADER_SIZE
*
i
);
}
/**************************************************************************
Looks for a used segment header on a segment header page. */
static
ulint
fsp_seg_hdr_page_find_used
(
/*=======================*/
/* out: segment header index, or ULINT_UNDEFINED
if not found */
page_t
*
page
,
/* in: segment header page */
mtr_t
*
mtr
)
/* in: mini-transaction handle */
{
ulint
i
;
fseg_header_t
*
header
;
for
(
i
=
0
;
i
<
FSP_SEG_HDRS_PER_PAGE
;
i
++
)
{
header
=
fsp_seg_hdr_page_get_nth_hdr
(
page
,
i
,
mtr
);
if
(
ut_dulint_cmp
(
mach_read_from_8
(
header
+
FSEG_ID
),
ut_dulint_zero
)
!=
0
)
{
/* This is used */
return
(
i
);
}
}
return
(
ULINT_UNDEFINED
);
}
/**************************************************************************
Looks for an unused segment header on a segment header page. */
static
ulint
fsp_seg_hdr_page_find_free
(
/*=======================*/
/* out: segment header index, or ULINT_UNDEFINED
if not found */
page_t
*
page
,
/* in: segment header page */
ulint
j
,
/* in: search forward starting from this index */
mtr_t
*
mtr
)
/* in: mini-transaction handle */
{
ulint
i
;
fseg_header_t
*
header
;
for
(
i
=
j
;
i
<
FSP_SEG_HDRS_PER_PAGE
;
i
++
)
{
header
=
fsp_seg_hdr_page_get_nth_hdr
(
page
,
i
,
mtr
);
if
(
ut_dulint_cmp
(
mach_read_from_8
(
header
+
FSEG_ID
),
ut_dulint_zero
)
==
0
)
{
/* This is unused */
return
(
i
);
}
}
return
(
ULINT_UNDEFINED
);
}
/**************************************************************************
Allocates a new file segment header page. */
static
bool
fsp_alloc_seg_hdr_page
(
/*===================*/
/* out: TRUE if could be allocated */
fsp_header_t
*
space_header
,
/* in: space header */
mtr_t
*
mtr
)
/* in: mini-transaction handle */
{
buf_block_t
*
block
;
ulint
page_no
;
page_t
*
page
;
fseg_header_t
*
header
;
ulint
i
;
page_no
=
fsp_alloc_free_page
(
buf_frame_get_space
(
space_header
),
0
,
mtr
);
if
(
page_no
==
FIL_NULL
)
{
return
(
FALSE
);
}
block
=
buf_page_get
(
buf_frame_get_space
(
space_header
),
page_no
,
mtr
);
buf_page_x_lock
(
block
,
mtr
);
page
=
buf_block_get_frame
(
block
);
for
(
i
=
0
;
i
<
FSP_SEG_HDRS_PER_PAGE
;
i
++
)
{
header
=
fsp_seg_hdr_page_get_nth_hdr
(
page
,
i
,
mtr
);
mlog_write_dulint
(
header
+
FSEG_ID
,
ut_dulint_zero
,
MLOG_8BYTES
,
mtr
);
}
flst_add_last
(
space_header
+
FSP_SEG_HDRS_FREE
,
page
+
FSEG_HDR_PAGE_NODE
,
mtr
);
return
(
TRUE
);
}
/**************************************************************************
Allocates a new file segment header. */
static
fseg_header_t
*
fsp_alloc_seg_header
(
/*=================*/
/* out: segment header, or NULL if
not enough space */
fsp_header_t
*
space_header
,
/* in: space header */
mtr_t
*
mtr
)
/* in: mini-transaction handle */
{
buf_block_t
*
block
;
ulint
page_no
;
page_t
*
page
;
fseg_header_t
*
header
;
ulint
n
;
bool
success
;
if
(
flst_get_len
(
space_header
+
FSP_SEG_HDRS_FREE
,
mtr
)
==
0
)
{
/* Allocate a new segment header page */
success
=
fsp_alloc_seg_hdr_page
(
space_header
,
mtr
);
if
(
!
success
)
{
return
(
NULL
);
}
}
page_no
=
flst_get_first
(
space_header
+
FSP_SEG_HDRS_FREE
,
mtr
).
page
;
block
=
buf_page_get
(
buf_frame_get_space
(
space_header
),
page_no
,
mtr
);
buf_page_x_lock
(
block
,
mtr
);
page
=
buf_block_get_frame
(
block
);
n
=
fsp_seg_hdr_page_find_free
(
page
,
0
,
mtr
);
ut_a
(
n
!=
ULINT_UNDEFINED
);
header
=
fsp_seg_hdr_page_get_nth_hdr
(
page
,
n
,
mtr
);
if
(
ULINT_UNDEFINED
==
fsp_seg_hdr_page_find_free
(
page
,
n
+
1
,
mtr
))
{
/* There are no other unused headers left on the page: move it
to another list */
flst_remove
(
space_header
+
FSP_SEG_HDRS_FREE
,
page
+
FSEG_HDR_PAGE_NODE
,
mtr
);
flst_add_last
(
space_header
+
FSP_SEG_HDRS_FULL
,
page
+
FSEG_HDR_PAGE_NODE
,
mtr
);
}
return
(
header
);
}
/**************************************************************************
Frees a file segment header. */
static
void
fsp_free_seg_header
(
/*================*/
ulint
space
,
/* in: space id */
fseg_header_t
*
header
,
/* in: segment header */
mtr_t
*
mtr
)
/* in: mini-transaction handle */
{
page_t
*
page
;
fsp_header_t
*
space_header
;
page
=
buf_frame_align
(
header
);
space_header
=
fsp_get_space_header
(
space
,
mtr
);
ut_ad
(
mach_read_from_4
(
header
+
FSEG_MAGIC_N
)
==
FSEG_MAGIC_N_VALUE
);
if
(
ULINT_UNDEFINED
==
fsp_seg_hdr_page_find_free
(
page
,
mtr
))
{
/* Move the page to another list */
flst_remove
(
space_header
+
FSP_SEG_HDRS_FULL
,
page
+
FSEG_HDR_PAGE_NODE
,
mtr
);
flst_add_last
(
space_header
+
FSP_SEG_HDRS_FREE
,
page
+
FSEG_HDR_PAGE_NODE
,
mtr
);
}
mlog_write_dulint
(
header
+
FSEG_ID
,
ut_dulint_zero
,
MLOG_8BYTES
,
mtr
);
mlog_write_ulint
(
header
+
FSEG_MAGIC_N
,
0
,
MLOG_4BYTES
,
mtr
);
if
(
ULINT_UNDEFINED
==
fsp_seg_hdr_page_find_used
(
page
,
mtr
))
{
/* There are no other used headers left on the page: free it */
flst_remove
(
space_header
+
FSP_SEG_HDRS_FREE
,
page
+
FSEG_HDR_PAGE_NODE
,
mtr
);
fsp_free_page
(
space
,
page_no
,
mtr
);
}
}
/**************************************************************************
Gets the page number from the nth fragment page slot. */
UNIV_INLINE
ulint
fseg_get_nth_frag_page_no
(
/*======================*/
/* out: page number, FIL_NULL if not in use */
fseg_header_t
*
header
,
/* in: segment header */
ulint
n
,
/* in: slot index */
mtr_t
*
mtr
)
/* in: mtr handle */
{
ut_ad
(
header
&&
mtr
);
ut_ad
(
n
<
FSEG_FRAG_ARR_N_SLOTS
);
ut_ad
(
mtr_memo_contains
(
mtr
,
buf_block_align
(
header
),
MTR_MEMO_PAGE_X_LOCK
));
return
(
mach_read_from_4
(
header
+
FSEG_FRAG_ARR
+
n
*
FSEG_FRAG_SLOT_SIZE
));
}
/**************************************************************************
Sets the page number in the nth fragment page slot. */
UNIV_INLINE
void
fseg_set_nth_frag_page_no
(
/*======================*/
fseg_header_t
*
header
,
/* in: segment header */
ulint
n
,
/* in: slot index */
ulint
page_no
,
/* in: page number to set */
mtr_t
*
mtr
)
/* in: mtr handle */
{
ut_ad
(
header
&&
mtr
);
ut_ad
(
n
<
FSEG_FRAG_ARR_N_SLOTS
);
ut_ad
(
mtr_memo_contains
(
mtr
,
buf_block_align
(
header
),
MTR_MEMO_PAGE_X_LOCK
));
mlog_write_ulint
(
header
+
FSEG_FRAG_ARR
+
n
*
FSEG_FRAG_SLOT_SIZE
,
page_no
,
MLOG_4BYTES
,
mtr
);
}
/**************************************************************************
Finds a fragment page slot which is free. */
static
ulint
fseg_find_free_frag_page_slot
(
/*==========================*/
/* out: slot index; ULINT_UNDEFINED if none
found */
fseg_header_t
*
header
,
/* in: segment header */
mtr_t
*
mtr
)
/* in: mtr handle */
{
ulint
i
;
ulint
page_no
;
ut_ad
(
header
&&
mtr
);
for
(
i
=
0
;
i
<
FSEG_FRAG_ARR_N_SLOTS
;
i
++
)
{
page_no
=
fseg_get_nth_frag_page_no
(
header
,
i
,
mtr
);
if
(
page_no
==
FIL_NULL
)
{
return
(
i
);
}
}
return
(
ULINT_UNDEFINED
);
}
/**************************************************************************
Finds a fragment page slot which is used and last in the array. */
static
ulint
fseg_find_last_used_frag_page_slot
(
/*===============================*/
/* out: slot index; ULINT_UNDEFINED if none
found */
fseg_header_t
*
header
,
/* in: segment header */
mtr_t
*
mtr
)
/* in: mtr handle */
{
ulint
i
;
ulint
page_no
;
ut_ad
(
header
&&
mtr
);
for
(
i
=
0
;
i
<
FSEG_FRAG_ARR_N_SLOTS
;
i
++
)
{
page_no
=
fseg_get_nth_frag_page_no
(
header
,
FSEG_ARR_N_SLOTS
-
i
-
1
,
mtr
);
if
(
page_no
!=
FIL_NULL
)
{
return
(
i
);
}
}
return
(
ULINT_UNDEFINED
);
}
/**************************************************************************
Calculates reserved fragment page slots. */
static
ulint
fseg_get_n_frag_pages
(
/*==================*/
/* out: number of fragment pages */
fseg_header_t
*
header
,
/* in: segment header */
mtr_t
*
mtr
)
/* in: mtr handle */
{
ulint
i
;
ulint
count
=
0
;
ut_ad
(
header
&&
mtr
);
for
(
i
=
0
;
i
<
FSEG_FRAG_ARR_N_SLOTS
;
i
++
)
{
if
(
FIL_NULL
!=
fseg_get_nth_frag_page_no
(
header
,
i
,
mtr
))
{
count
++
;
}
}
return
(
count
);
}
/**************************************************************************
Creates a new segment. */
ulint
fseg_create
(
/*========*/
/* out: the page number where the segment header is
placed, FIL_NULL if could not create segment because
lack of space */
ulint
space
,
/* in: space id */
ulint
*
offset
,
/* out: byte offset of the segment header on its
page */
mtr_t
*
mtr
)
/* in: mtr */
{
buf_block_t
*
block
;
buf_frame_t
*
frame
;
fsp_header_t
*
space_header
;
fseg_header_t
*
header
;
dulint
seg_id
;
ulint
i
;
ut_ad
(
mtr
);
mtr_x_lock
(
&
fsp_latch
,
mtr
);
space_header
=
fsp_get_space_header
(
space
,
mtr
);
header
=
fsp_alloc_seg_header
(
space_header
,
mtr
);
if
(
header
==
NULL
)
{
return
(
FIL_NULL
);
}
/* Read the next segment id from space header and increment the
value in space header */
seg_id
=
mtr_read_dulint
(
space_header
+
FSP_SEG_ID
,
MLOG_8BYTES
,
mtr
);
mlog_write_dulint
(
space_header
+
FSP_SEG_ID
,
ut_dulint_add
(
seg_id
,
1
),
MLOG_8BYTES
,
mtr
);
mlog_write_dulint
(
header
+
FSEG_ID
,
seg_id
,
MLOG_8BYTES
,
mtr
);
mlog_write_ulint
(
header
+
FSEG_NOT_FULL_N_USED
,
0
,
MLOG_4BYTES
,
mtr
);
flst_init
(
header
+
FSEG_FREE
,
mtr
);
flst_init
(
header
+
FSEG_NOT_FULL
,
mtr
);
flst_init
(
header
+
FSEG_FULL
,
mtr
);
mlog_write_ulint
(
header
+
FSEG_MAGIC_N
,
FSEG_MAGIC_N_VALUE
,
MLOG_4BYTES
,
mtr
);
for
(
i
=
0
;
i
<
FSEG_FRAG_ARR_N_SLOTS
;
i
++
)
{
fseg_set_nth_frag_page_no
(
header
,
i
,
FIL_NULL
,
mtr
);
}
*
offset
=
header
-
buf_frame_align
(
header
);
return
(
buf_frame_get_page
(
buf_frame_align
(
header
)));
}
/**************************************************************************
Calculates the number of pages reserved by a segment, and how
many pages are currently used. */
ulint
fseg_n_reserved_pages
(
/*==================*/
/* out: number of reserved pages */
fseg_header_t
*
header
,
/* in: segment header */
ulint
*
used
,
/* out: number of pages used (<= reserved) */
mtr_t
*
mtr
)
/* in: mtr handle */
{
ulint
ret
;
mtr_x_lock
(
&
fsp_latch
,
mtr
);
ret
=
fseg_n_reserved_pages_low
(
header
,
used
,
mtr
);
return
(
ret
);
}
/**************************************************************************
Calculates the number of pages reserved by a segment, and how
many pages are currently used. */
static
ulint
fseg_n_reserved_pages_low
(
/*======================*/
/* out: number of reserved pages */
fseg_header_t
*
header
,
/* in: segment header */
ulint
*
used
,
/* out: number of pages used (<= reserved) */
mtr_t
*
mtr
)
/* in: mtr handle */
{
ulint
ret
;
ut_ad
(
header
&&
used
&&
mtr
);
ut_ad
(
mtr_memo_contains
(
mtr
,
buf_block_align
(
header
),
MTR_MEMO_BUF_FIX
));
buf_page_x_lock
(
buf_block_align
(
header
),
mtr
);
*
used
=
mtr_read_ulint
(
header
+
FSEG_NOT_FULL_N_USED
,
MLOG_4BYTES
,
mtr
)
+
FSP_EXTENT_SIZE
*
flst_get_len
(
header
+
FSEG_FULL
,
mtr
)
+
fseg_get_n_frag_pages
(
header
,
mtr
);
ret
=
fseg_get_n_frag_pages
(
header
,
mtr
)
+
FSP_EXTENT_SIZE
*
flst_get_len
(
header
+
FSEG_FREE
,
mtr
)
+
FSP_EXTENT_SIZE
*
flst_get_len
(
header
+
FSEG_NOT_FULL
,
mtr
)
+
FSP_EXTENT_SIZE
*
flst_get_len
(
header
+
FSEG_FULL
,
mtr
);
return
(
ret
);
}
/*************************************************************************
Tries to fill the free list of a segment with consecutive free extents.
This happens if the segment is big enough to allowextents in the free list,
the free list is empty, and the extents can be allocated consecutively from
the hint onward. */
static
void
fseg_fill_free_list
(
/*================*/
fseg_header_t
*
header
,
/* in: segment header */
ulint
space
,
/* in: space id */
ulint
hint
,
/* in: hint which extent would be good as
the first extent */
mtr_t
*
mtr
)
/* in: mtr */
{
xdes_t
*
descr
;
ulint
i
;
dulint
seg_id
;
ulint
reserved
;
ulint
used
;
ut_ad
(
header
&&
mtr
);
buf_page_x_lock
(
buf_block_align
(
header
),
mtr
);
reserved
=
fseg_n_reserved_pages_low
(
header
,
&
used
,
mtr
);
if
(
reserved
<
FSEG_FREE_LIST_LIMIT
*
FSP_EXTENT_SIZE
)
{
/* The segment is too small to allow extents in free list */
return
;
}
if
(
flst_get_len
(
header
+
FSEG_FREE
,
mtr
)
>
0
)
{
/* Free list is not empty */
return
;
}
for
(
i
=
0
;
i
<
FSEG_FREE_LIST_MAX_LEN
;
i
++
)
{
descr
=
xdes_get_descriptor
(
space
,
hint
,
mtr
);
if
((
descr
==
NULL
)
||
(
XDES_FREE
!=
xdes_get_state
(
descr
,
mtr
)))
{
/* We cannot allocate the desired extent: stop */
return
;
}
descr
=
fsp_alloc_free_extent
(
space
,
hint
,
mtr
);
xdes_set_state
(
descr
,
XDES_FSEG
,
mtr
);
seg_id
=
mtr_read_dulint
(
header
+
FSEG_ID
,
MLOG_8BYTES
,
mtr
);
mlog_write_dulint
(
descr
+
XDES_ID
,
seg_id
,
MLOG_8BYTES
,
mtr
);
flst_add_last
(
header
+
FSEG_FREE
,
descr
+
XDES_FLST_NODE
,
mtr
);
hint
+=
FSP_EXTENT_SIZE
;
}
}
/*************************************************************************
Allocates a free extent for the segment: looks first in the
free list of the segment, then tries to allocate from the space free
list. NOTE that the extent returned is still placed in the segment free
list, not taken off it! */
static
xdes_t
*
fseg_alloc_free_extent
(
/*===================*/
/* out: allocated extent, still placed in the
segment free list, NULL if could
not be allocated */
fseg_header_t
*
header
,
/* in: segment header */
ulint
space
,
/* in: space id */
mtr_t
*
mtr
)
/* in: mtr */
{
xdes_t
*
descr
;
dulint
seg_id
;
fil_addr_t
first
;
buf_page_x_lock
(
buf_block_align
(
header
),
mtr
);
if
(
flst_get_len
(
header
+
FSEG_FREE
,
mtr
)
>
0
)
{
/* Segment free list is not empty, allocate from it */
first
=
flst_get_first
(
header
+
FSEG_FREE
,
mtr
);
descr
=
xdes_lst_get_descriptor
(
space
,
first
,
mtr
);
}
else
{
/* Segment free list was empty, allocate from space */
descr
=
fsp_alloc_free_extent
(
space
,
0
,
mtr
);
if
(
descr
==
NULL
)
{
return
(
NULL
);
}
seg_id
=
mtr_read_dulint
(
header
+
FSEG_ID
,
MLOG_8BYTES
,
mtr
);
xdes_set_state
(
descr
,
XDES_FSEG
,
mtr
);
mlog_write_dulint
(
descr
+
XDES_ID
,
seg_id
,
MLOG_8BYTES
,
mtr
);
flst_add_last
(
header
+
FSEG_FREE
,
descr
+
XDES_FLST_NODE
,
mtr
);
/* Try to fill the segment free list */
fseg_fill_free_list
(
header
,
space
,
xdes_get_offset
(
descr
)
+
FSP_EXTENT_SIZE
,
mtr
);
}
return
(
descr
);
}
/**************************************************************************
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
fragmentation. */
ulint
fseg_alloc_free_page
(
/*=================*/
/* out: the allocated page offset
FIL_NULL if no page could be allocated */
fseg_header_t
*
seg_header
,
/* in: segment header */
ulint
hint
,
/* in: hint of which page would be desirable */
byte
direction
,
/* in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
mtr_t
*
mtr
)
/* in: mtr handle */
{
buf_block_t
*
block
;
dulint
seg_id
;
fseg_page_header_t
*
page_header
;
ulint
space
;
ulint
used
;
ulint
reserved
;
fil_addr_t
first
;
xdes_t
*
descr
;
/* extent of the hinted page */
ulint
ret_page
;
/* the allocated page offset, FIL_NULL
if could not be allocated */
buf_block_t
*
ret_buf_page
;
buf_frame_t
*
ret_frame
;
xdes_t
*
ret_descr
;
/* the extent of the allocated page */
ulint
n
;
bool
frag_page_allocated
=
FALSE
;
ut_ad
(
seg_header
&&
mtr
);
ut_ad
((
direction
>=
FSP_UP
)
&&
(
direction
<=
FSP_NO_DIR
));
mtr_x_lock
(
&
fsp_latch
,
mtr
);
block
=
buf_block_align
(
seg_header
);
buf_page_x_lock
(
block
,
mtr
);
space
=
buf_page_get_space
(
block
);
seg_id
=
mtr_read_dulint
(
seg_header
+
FSEG_ID
,
MLOG_8BYTES
,
mtr
);
ut_ad
(
ut_dulint_cmp
(
seg_id
,
ut_dulint_zero
)
>
0
);
reserved
=
fseg_n_reserved_pages_low
(
seg_header
,
&
used
,
mtr
);
descr
=
xdes_get_descriptor
(
space
,
hint
,
mtr
);
if
(
descr
==
NULL
)
{
/* Hint outside space or too high above free limit:
reset hint */
hint
=
0
;
descr
=
xdes_get_descriptor
(
space
,
hint
,
mtr
);
}
/* In the big if-else below we look for ret_page and ret_descr */
/*-------------------------------------------------------------*/
if
((
xdes_get_state
(
descr
,
mtr
)
==
XDES_FSEG
)
&&
(
0
==
ut_dulint_cmp
(
mtr_read_dulint
(
descr
+
XDES_ID
,
MLOG_8BYTES
,
mtr
),
seg_id
))
&&
(
xdes_get_bit
(
descr
,
XDES_FREE_BIT
,
hint
%
FSP_EXTENT_SIZE
,
mtr
)
==
TRUE
))
{
/* 1. We can take the hinted page
=================================*/
ret_descr
=
descr
;
ret_page
=
hint
;
/*-------------------------------------------------------------*/
}
else
if
((
xdes_get_state
(
descr
,
mtr
)
==
XDES_FREE
)
&&
((
reserved
-
used
)
<
reserved
/
FSEG_FILLFACTOR
)
&&
(
used
>=
FSEG_FRAG_LIMIT
))
{
/* 2. We allocate the free extent from space and can take
=========================================================
the hinted page
===============*/
ret_descr
=
fsp_alloc_free_extent
(
space
,
hint
,
mtr
);
ut_a
(
ret_descr
==
descr
);
xdes_set_state
(
ret_descr
,
XDES_FSEG
,
mtr
);
mlog_write_dulint
(
ret_descr
+
XDES_ID
,
seg_id
,
MLOG_8BYTES
,
mtr
);
flst_add_last
(
seg_header
+
FSEG_FREE
,
ret_descr
+
XDES_FLST_NODE
,
mtr
);
/* Try to fill the segment free list */
fseg_fill_free_list
(
seg_header
,
space
,
hint
+
FSP_EXTENT_SIZE
,
mtr
);
ret_page
=
hint
;
/*-------------------------------------------------------------*/
}
else
if
((
direction
!=
FSP_NO_DIR
)
&&
((
reserved
-
used
)
<
reserved
/
FSEG_FILLFACTOR
)
&&
(
used
>=
FSEG_FRAG_LIMIT
)
&&
(
NULL
!=
(
ret_descr
=
fseg_alloc_free_extent
(
seg_header
,
space
,
mtr
))))
{
/* 3. We take any free extent (which was already assigned above
===============================================================
in the if-condition to ret_descr) and take the lowest or
========================================================
highest page in it, depending on the direction
==============================================*/
ret_page
=
xdes_get_offset
(
ret_descr
);
if
(
direction
==
FSP_DOWN
)
{
ret_page
+=
FSP_EXTENT_SIZE
-
1
;
}
/*-------------------------------------------------------------*/
}
else
if
((
xdes_get_state
(
descr
,
mtr
)
==
XDES_FSEG
)
&&
(
0
==
ut_dulint_cmp
(
mtr_read_dulint
(
descr
+
XDES_ID
,
MLOG_8BYTES
,
mtr
),
seg_id
))
&&
(
!
xdes_is_full
(
descr
,
mtr
)))
{
/* 4. We can take the page from the same extent as the
======================================================
hinted page (and the extent already belongs to the
==================================================
segment)
========*/
ret_descr
=
descr
;
ret_page
=
xdes_get_offset
(
ret_descr
)
+
xdes_find_bit
(
ret_descr
,
XDES_FREE_BIT
,
TRUE
,
hint
%
FSP_EXTENT_SIZE
,
mtr
);
/*-------------------------------------------------------------*/
}
else
if
(
reserved
-
used
>
0
)
{
/* 5. We take any unused page from the segment
==============================================*/
if
(
flst_get_len
(
seg_header
+
FSEG_NOT_FULL
,
mtr
)
>
0
)
{
first
=
flst_get_first
(
seg_header
+
FSEG_NOT_FULL
,
mtr
);
}
else
if
(
flst_get_len
(
seg_header
+
FSEG_FREE
,
mtr
)
>
0
)
{
first
=
flst_get_first
(
seg_header
+
FSEG_FREE
,
mtr
);
}
else
{
ut_error
;
}
ret_descr
=
xdes_lst_get_descriptor
(
space
,
first
,
mtr
);
ret_page
=
xdes_get_offset
(
ret_descr
)
+
xdes_find_bit
(
ret_descr
,
XDES_FREE_BIT
,
TRUE
,
0
,
mtr
);
/*-------------------------------------------------------------*/
}
else
if
(
used
<
FSEG_FRAG_LIMIT
)
{
/* 6. We allocate an individual page from the space
===================================================*/
ret_page
=
fsp_alloc_free_page
(
space
,
hint
,
mtr
);
ret_descr
=
NULL
;
frag_page_allocated
=
TRUE
;
if
(
ret_page
!=
FIL_NULL
)
{
/* Put the page in the fragment page array of the
segment */
n
=
fseg_find_free_frag_page_slot
(
seg_header
,
mtr
);
ut_a
(
n
!=
FIL_NULL
);
fseg_set_nth_frag_page_no
(
seg_header
,
n
,
ret_page
,
mtr
);
}
/*-------------------------------------------------------------*/
}
else
{
/* 7. We allocate a new extent and take its first page
======================================================*/
ret_descr
=
fseg_alloc_free_extent
(
seg_header
,
space
,
mtr
);
if
(
ret_descr
==
NULL
)
{
ret_page
=
FIL_NULL
;
}
else
{
ret_page
=
xdes_get_offset
(
ret_descr
);
}
}
if
(
ret_page
==
FIL_NULL
)
{
/* Page could not be allocated */
return
(
FIL_NULL
);
}
/* Initialize the allocated page to buffer pool, so that it can be
obtained immediately with buf_page_get without need for disk read */
ret_buf_page
=
buf_page_create
(
space
,
ret_page
,
mtr
);
if
(
!
frag_page_allocated
)
{
/* At this point we know the extent and the page offset.
The extent is still in the appropriate list (FSEG_NOT_FULL or
FSEG_FREE), and the page is not yet marked as used. */
ut_ad
(
xdes_get_descriptor
(
space
,
ret_page
,
mtr
)
==
ret_descr
);
ut_ad
(
xdes_get_bit
(
ret_descr
,
XDES_FREE_BIT
,
ret_page
%
FSP_EXTENT_SIZE
,
mtr
)
==
TRUE
);
fseg_mark_page_used
(
seg_header
,
space
,
ret_page
,
mtr
);
}
return
(
ret_page
);
}
/************************************************************************
Marks a page used. The page must reside within the extents of the given
segment. */
static
void
fseg_mark_page_used
(
/*================*/
fseg_header_t
*
seg_header
,
/* in: segment header */
ulint
space
,
/* in: space id */
ulint
page
,
/* in: page offset */
mtr_t
*
mtr
)
/* in: mtr */
{
xdes_t
*
descr
;
ulint
not_full_n_used
;
ut_ad
(
seg_header
&&
mtr
);
descr
=
xdes_get_descriptor
(
space
,
page
,
mtr
);
ut_ad
(
mtr_read_ulint
(
seg_header
+
FSEG_ID
,
MLOG_4BYTES
,
mtr
)
==
mtr_read_ulint
(
descr
+
XDES_ID
,
MLOG_4BYTES
,
mtr
));
if
(
xdes_is_free
(
descr
,
mtr
))
{
/* We move the extent from the free list to the
NOT_FULL list */
flst_remove
(
seg_header
+
FSEG_FREE
,
descr
+
XDES_FLST_NODE
,
mtr
);
flst_add_last
(
seg_header
+
FSEG_NOT_FULL
,
descr
+
XDES_FLST_NODE
,
mtr
);
}
ut_ad
(
xdes_get_bit
(
descr
,
XDES_FREE_BIT
,
page
%
FSP_EXTENT_SIZE
,
mtr
)
==
TRUE
);
/* We mark the page as used */
xdes_set_bit
(
descr
,
XDES_FREE_BIT
,
page
%
FSP_EXTENT_SIZE
,
FALSE
,
mtr
);
not_full_n_used
=
mtr_read_ulint
(
seg_header
+
FSEG_NOT_FULL_N_USED
,
MLOG_4BYTES
,
mtr
);
not_full_n_used
++
;
mlog_write_ulint
(
seg_header
+
FSEG_NOT_FULL_N_USED
,
not_full_n_used
,
MLOG_4BYTES
,
mtr
);
if
(
xdes_is_full
(
descr
,
mtr
))
{
/* We move the extent from the NOT_FULL list to the
FULL list */
flst_remove
(
seg_header
+
FSEG_NOT_FULL
,
descr
+
XDES_FLST_NODE
,
mtr
);
flst_add_last
(
seg_header
+
FSEG_FULL
,
descr
+
XDES_FLST_NODE
,
mtr
);
mlog_write_ulint
(
seg_header
+
FSEG_NOT_FULL_N_USED
,
not_full_n_used
-
FSP_EXTENT_SIZE
,
MLOG_4BYTES
,
mtr
);
}
}
/**************************************************************************
Frees a single page of a segment. */
void
fseg_free_page
(
/*===========*/
fseg_header_t
*
seg_header
,
/* in: segment header */
ulint
space
,
/* in: space id */
ulint
page
,
/* in: page offset */
mtr_t
*
mtr
)
/* in: mtr handle */
{
mtr_x_lock
(
&
fsp_latch
,
mtr
);
fseg_free_page_low
(
seg_header
,
space
,
page
,
mtr
);
}
/**************************************************************************
Frees a single page of a segment. */
static
void
fseg_free_page_low
(
/*===============*/
fseg_header_t
*
seg_header
,
/* in: segment header */
ulint
space
,
/* in: space id */
ulint
page
,
/* in: page offset */
mtr_t
*
mtr
)
/* in: mtr handle */
{
buf_block_t
*
block
;
xdes_t
*
descr
;
ulint
used
;
ulint
not_full_n_used
;
ulint
state
;
buf_block_t
*
buf_page
;
buf_frame_t
*
buf_frame
;
ulint
i
;
ut_ad
(
seg_header
&&
mtr
);
block
=
buf_block_align
(
seg_header
);
buf_page_x_lock
(
block
,
mtr
);
descr
=
xdes_get_descriptor
(
space
,
page
,
mtr
);
ut_a
(
descr
);
ut_a
(
xdes_get_bit
(
descr
,
XDES_FREE_BIT
,
page
%
FSP_EXTENT_SIZE
,
mtr
)
==
FALSE
);
state
=
xdes_get_state
(
descr
,
mtr
);
if
(
state
!=
XDES_FSEG
)
{
/* The page is in the fragment pages of the segment */
for
(
i
=
0
;;
i
++
)
{
if
(
fseg_get_nth_frag_page_no
(
seg_header
,
i
,
mtr
)
==
page
)
{
fseg_set_nth_frag_page_no
(
seg_header
,
i
,
FIL_NULL
,
mtr
);
break
;
}
}
fsp_free_page
(
space
,
page
,
mtr
);
return
;
}
/* If we get here, the page is in some extent of the segment */
ut_a
(
0
==
ut_dulint_cmp
(
mtr_read_dulint
(
descr
+
XDES_ID
,
MLOG_8BYTES
,
mtr
),
mtr_read_dulint
(
seg_header
+
FSEG_ID
,
MLOG_8BYTES
,
mtr
)));
not_full_n_used
=
mtr_read_ulint
(
seg_header
+
FSEG_NOT_FULL_N_USED
,
MLOG_4BYTES
,
mtr
);
if
(
xdes_is_full
(
descr
,
mtr
))
{
/* The fragment is full: move it to another list */
flst_remove
(
seg_header
+
FSEG_FULL
,
descr
+
XDES_FLST_NODE
,
mtr
);
flst_add_last
(
seg_header
+
FSEG_NOT_FULL
,
descr
+
XDES_FLST_NODE
,
mtr
);
mlog_write_ulint
(
seg_header
+
FSEG_NOT_FULL_N_USED
,
not_full_n_used
+
FSP_EXTENT_SIZE
-
1
,
MLOG_4BYTES
,
mtr
);
}
else
{
ut_a
(
not_full_n_used
>
0
);
mlog_write_ulint
(
seg_header
+
FSEG_NOT_FULL_N_USED
,
not_full_n_used
-
1
,
MLOG_4BYTES
,
mtr
);
}
xdes_set_bit
(
descr
,
XDES_FREE_BIT
,
page
%
FSP_EXTENT_SIZE
,
TRUE
,
mtr
);
xdes_set_bit
(
descr
,
XDES_CLEAN_BIT
,
page
%
FSP_EXTENT_SIZE
,
TRUE
,
mtr
);
if
(
xdes_is_free
(
descr
,
mtr
))
{
/* The extent has become free: free it to space */
flst_remove
(
seg_header
+
FSEG_NOT_FULL
,
descr
+
XDES_FLST_NODE
,
mtr
);
fsp_free_extent
(
space
,
page
,
mtr
);
}
}
/**************************************************************************
Frees an extent of a segment to the space free list. */
static
void
fseg_free_extent
(
/*=============*/
fseg_header_t
*
seg_header
,
/* in: segment header */
ulint
space
,
/* in: space id */
ulint
page
,
/* in: page offset in the extent */
mtr_t
*
mtr
)
/* in: mtr handle */
{
buf_block_t
*
block
;
xdes_t
*
descr
;
ulint
not_full_n_used
;
ulint
descr_n_used
;
ut_ad
(
seg_header
&&
mtr
);
block
=
buf_block_align
(
seg_header
);
buf_page_x_lock
(
block
,
mtr
);
descr
=
xdes_get_descriptor
(
space
,
page
,
mtr
);
ut_a
(
xdes_get_state
(
descr
,
mtr
)
==
XDES_FSEG
);
ut_a
(
0
==
ut_dulint_cmp
(
mtr_read_dulint
(
descr
+
XDES_ID
,
MLOG_8BYTES
,
mtr
),
mtr_read_dulint
(
seg_header
+
FSEG_ID
,
MLOG_8BYTES
,
mtr
)));
if
(
xdes_is_full
(
descr
,
mtr
))
{
flst_remove
(
seg_header
+
FSEG_FULL
,
descr
+
XDES_FLST_NODE
,
mtr
);
}
else
if
(
xdes_is_free
(
descr
,
mtr
))
{
flst_remove
(
seg_header
+
FSEG_FREE
,
descr
+
XDES_FLST_NODE
,
mtr
);
}
else
{
flst_remove
(
seg_header
+
FSEG_NOT_FULL
,
descr
+
XDES_FLST_NODE
,
mtr
);
not_full_n_used
=
mtr_read_ulint
(
seg_header
+
FSEG_NOT_FULL_N_USED
,
MLOG_4BYTES
,
mtr
);
descr_n_used
=
xdes_get_n_used
(
descr
,
mtr
);
ut_a
(
not_full_n_used
>=
descr_n_used
);
mlog_write_ulint
(
seg_header
+
FSEG_NOT_FULL_N_USED
,
not_full_n_used
-
descr_n_used
,
MLOG_4BYTES
,
mtr
);
}
fsp_free_extent
(
space
,
page
,
mtr
);
}
/**************************************************************************
Frees part of a segment. This function can be used to free a segment
by repeatedly calling this function in different mini-transactions.
Doing the freeing in a single mini-transaction might result in too big
a mini-transaction. */
bool
fseg_free_step
(
/*===========*/
/* out: TRUE if freeing completed */
ulint
space
,
/* in: segment space id */
ulint
page_no
,
/* in: segment header page number */
ulint
offset
,
/* in: segment header byte offset on page */
mtr_t
*
mtr
)
/* in: mtr */
{
buf_block_t
*
block
;
ulint
n
;
ulint
page
;
xdes_t
*
descr
;
fseg_header_t
*
header
;
fil_addr_t
header_addr
;
header_addr
.
page
=
page_no
;
header_addr
.
boffset
=
offset
;
mtr_x_lock
(
&
fsp_latch
,
mtr
);
header
=
fut_get_ptr_x_lock
(
space
,
header_addr
,
mtr
);
descr
=
fseg_get_first_extent
(
header
,
mtr
);
if
(
descr
!=
NULL
)
{
/* Free the extent held by the segment */
page
=
xdes_get_offset
(
descr
);
fseg_free_extent
(
header
,
space
,
page
,
mtr
);
return
(
FALSE
);
}
/* Free a frag page */
n
=
fseg_get_last_used_frag_page_slot
(
header
,
mtr
);
if
(
n
==
ULINT_UNDEFINED
)
{
/* Freeing completed: free the segment header */
fsp_free_seg_header
(
space
,
header
,
mtr
);
return
(
TRUE
);
}
fseg_free_page_low
(
header
,
space
,
fseg_get_nth_frag_page_no
(
header
,
n
,
mtr
),
mtr
);
return
(
FALSE
);
}
/***********************************************************************
Frees a segment. The freeing is performed in several mini-transactions,
so that there is no danger of bufferfixing too many buffer pages. */
void
fseg_free
(
/*======*/
ulint
space
,
/* in: space id */
ulint
page_no
,
/* in: page number where the segment header is
placed */
ulint
offset
)
/* in: byte offset of the segment header on that
page */
{
mtr_t
mtr
;
buf_block_t
*
block
;
bool
finished
;
for
(;;)
{
mtr_start
(
&
mtr
);
block
=
buf_page_get
(
space
,
page_no
,
&
mtr
);
finished
=
fseg_free_step
(
space
,
page_no
,
offset
,
&
mtr
);
mtr_commit
(
&
mtr
);
if
(
finished
)
{
break
;
}
}
}
/**************************************************************************
Returns the first extent descriptor for a segment. We think of the extent
lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
-> FSEG_FREE. */
static
xdes_t
*
fseg_get_first_extent
(
/*==================*/
/* out: the first extent descriptor, or NULL if
none */
fseg_header_t
*
header
,
/* in: segment header */
mtr_t
*
mtr
)
/* in: mtr */
{
buf_block_t
*
block
;
fil_addr_t
first
;
ulint
space
;
xdes_t
*
descr
;
ut_ad
(
header
&&
mtr
);
block
=
buf_block_align
(
header
);
buf_page_x_lock
(
block
,
mtr
);
space
=
buf_page_get_space
(
block
);
first
=
fil_addr_null
;
if
(
flst_get_len
(
header
+
FSEG_FULL
,
mtr
)
>
0
)
{
first
=
flst_get_first
(
header
+
FSEG_FULL
,
mtr
);
}
else
if
(
flst_get_len
(
header
+
FSEG_NOT_FULL
,
mtr
)
>
0
)
{
first
=
flst_get_first
(
header
+
FSEG_NOT_FULL
,
mtr
);
}
else
if
(
flst_get_len
(
header
+
FSEG_FREE
,
mtr
)
>
0
)
{
first
=
flst_get_first
(
header
+
FSEG_FREE
,
mtr
);
}
if
(
first
.
page
==
FIL_NULL
)
{
return
(
NULL
);
}
else
{
descr
=
xdes_lst_get_descriptor
(
space
,
first
,
mtr
);
return
(
descr
);
}
}
#ifdef notdefined
/**************************************************************************
Returns the last non-free extent descriptor for a segment. We think of
the extent lists of the segment catenated in the order FSEG_FULL ->
FSEG_NOT_FULL -> FSEG_FREE. */
static
xdes_t
*
fseg_get_last_non_free_extent
(
/*==========================*/
/* out: the last extent descriptor, or NULL if
none */
fseg_header_t
*
header
,
/* in: segment header */
mtr_t
*
mtr
)
/* in: mtr */
{
buf_block_t
*
block
;
fil_addr_t
last
;
ulint
space
;
xdes_t
*
descr
;
ut_ad
(
header
&&
mtr
);
block
=
buf_block_align
(
header
);
buf_page_x_lock
(
block
,
mtr
);
space
=
buf_page_get_space
(
block
);
last
=
fil_addr_null
;
if
(
flst_get_len
(
header
+
FSEG_NOT_FULL
,
mtr
)
>
0
)
{
last
=
flst_get_last
(
header
+
FSEG_NOT_FULL
,
mtr
);
}
else
if
(
flst_get_len
(
header
+
FSEG_FULL
,
mtr
)
>
0
)
{
last
=
flst_get_last
(
header
+
FSEG_FULL
,
mtr
);
}
if
(
last
.
page
==
FIL_NULL
)
{
return
(
NULL
);
}
else
{
descr
=
xdes_lst_get_descriptor
(
space
,
last
,
mtr
);
return
(
descr
);
}
}
/**************************************************************************
Returns the next extent descriptor for a segment. We think of the extent
lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
-> FSEG_FREE. */
static
xdes_t
*
fseg_get_next_extent
(
/*=================*/
/* out: next extent descriptor, or NULL if
none */
fseg_header_t
*
header
,
/* in: segment header */
xdes_t
*
descr
,
/* in: previous extent descriptor */
mtr_t
*
mtr
)
/* in: mtr */
{
fil_addr_t
next_addr
;
buf_block_t
*
block
;
ulint
space
;
ut_ad
(
header
&&
descr
&&
mtr
);
block
=
buf_block_align
(
header
);
buf_page_x_lock
(
block
,
mtr
);
space
=
buf_page_get_space
(
block
);
next_addr
=
flst_get_next_addr
(
descr
+
XDES_FLST_NODE
,
mtr
);
if
(
next_addr
.
page
==
FIL_NULL
)
{
/* This is the last extent in the list. */
if
(
xdes_is_full
(
descr
,
mtr
))
{
/* descr is in FSEG_FULL list */
if
(
flst_get_len
(
header
+
FSEG_NOT_FULL
,
mtr
)
>
0
)
{
next_addr
=
flst_get_first
(
header
+
FSEG_NOT_FULL
,
mtr
);
}
else
if
(
flst_get_len
(
header
+
FSEG_FREE
,
mtr
)
>
0
)
{
next_addr
=
flst_get_first
(
header
+
FSEG_FREE
,
mtr
);
}
}
else
if
(
!
xdes_is_full
(
descr
,
mtr
)
&&
!
xdes_is_free
(
descr
,
mtr
))
{
/* descr is in FSEG_NOT_FULL list */
if
(
flst_get_len
(
header
+
FSEG_FREE
,
mtr
)
>
0
)
{
next_addr
=
flst_get_first
(
header
+
FSEG_FREE
,
mtr
);
}
}
}
if
(
next_addr
.
page
!=
FIL_NULL
)
{
descr
=
xdes_lst_get_descriptor
(
space
,
next_addr
,
mtr
);
ut_ad
(
descr
);
return
(
descr
);
}
else
{
return
(
NULL
);
}
}
/**************************************************************************
Returns the previous extent descriptor for a segment. We think of the extent
lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
-> FSEG_FREE. */
static
xdes_t
*
fseg_get_prev_extent
(
/*=================*/
/* out: previous extent descriptor, or NULL if
none */
fseg_header_t
*
header
,
/* in: segment header */
xdes_t
*
descr
,
/* in: extent descriptor */
mtr_t
*
mtr
)
/* in: mtr */
{
fil_addr_t
prev_addr
;
buf_block_t
*
block
;
ulint
space
;
ut_ad
(
header
&&
descr
&&
mtr
);
block
=
buf_block_align
(
header
);
buf_page_x_lock
(
block
,
mtr
);
space
=
buf_page_get_space
(
block
);
prev_addr
=
flst_get_prev_addr
(
descr
+
XDES_FLST_NODE
,
mtr
);
if
(
prev_addr
.
page
==
FIL_NULL
)
{
/* This is the first extent in the list. */
if
(
xdes_is_free
(
descr
,
mtr
))
{
/* descr is in FSEG_FREE list */
if
(
flst_get_len
(
header
+
FSEG_NOT_FULL
,
mtr
)
>
0
)
{
prev_addr
=
flst_get_last
(
header
+
FSEG_NOT_FULL
,
mtr
);
}
else
if
(
flst_get_len
(
header
+
FSEG_FULL
,
mtr
)
>
0
)
{
prev_addr
=
flst_get_last
(
header
+
FSEG_FULL
,
mtr
);
}
}
else
if
(
!
xdes_is_full
(
descr
,
mtr
)
&&
!
xdes_is_free
(
descr
,
mtr
))
{
/* descr is in FSEG_NOT_FULL list */
if
(
flst_get_len
(
header
+
FSEG_FULL
,
mtr
)
>
0
)
{
prev_addr
=
flst_get_last
(
header
+
FSEG_FULL
,
mtr
);
}
}
}
if
(
prev_addr
.
page
!=
FIL_NULL
)
{
descr
=
xdes_lst_get_descriptor
(
space
,
prev_addr
,
mtr
);
ut_ad
(
descr
);
return
(
descr
);
}
else
{
return
(
NULL
);
}
}
/*************************************************************************
Gets the first used page number in the given extent assigned to a
specific segment, or its successors, in the order defined in
fsp_get_next_extent. */
static
ulint
fseg_extent_get_next_page_no
(
/*=========================*/
/* next used page number in the given extent
or a successor of it, FIL_NULL if no page
found */
fseg_header_t
*
header
,
/* in: segment header */
xdes_t
*
descr
,
/* in: extent descriptor, if this is NULL, the
function returns FIL_NULL */
mtr_t
*
mtr
)
/* in: mtr */
{
ulint
bit
;
UT_NOT_USED
(
header
);
ut_ad
((
descr
==
NULL
)
||
(
xdes_get_state
(
descr
,
mtr
)
==
XDES_FSEG
));
for
(;;)
{
if
(
descr
==
NULL
)
{
return
(
FIL_NULL
);
}
bit
=
xdes_find_bit
(
descr
,
XDES_FREE_BIT
,
FALSE
,
0
,
mtr
);
if
(
bit
==
ULINT_UNDEFINED
)
{
/* No page found in this extent: the extent is in
FSEG_FREE list, thus, no used page can be found
in successors */
return
(
FIL_NULL
);
}
else
{
return
(
xdes_get_offset
(
descr
)
+
bit
);
}
}
}
/*************************************************************************
Gets the last used page number in the given extent assigned to a
specific segment, or its predecessor extents, in the order defined in
fsp_get_next_extent. If the page cannot be found from the extents,
the last page of the fragment list is returned, or FIL_NULL if it is
empty.*/
static
ulint
fseg_extent_get_prev_page_no
(
/*=========================*/
/* previous used page number in the given
extent or a predecessor, FIL_NULL
if no page found */
fseg_header_t
*
header
,
/* in: segment header */
xdes_t
*
descr
,
/* in: extent descriptor, if this is NULL, the
function returns the last page of the fragment
list, if any */
mtr_t
*
mtr
)
/* in: mtr */
{
ulint
prev_page_no
;
ulint
bit
;
fil_addr_t
last_frag_page_addr
;
ut_ad
((
descr
==
NULL
)
||
(
xdes_get_state
(
descr
,
mtr
)
==
XDES_FSEG
));
for
(;;)
{
if
(
descr
==
NULL
)
{
prev_page_no
=
FIL_NULL
;
break
;
}
bit
=
xdes_find_bit_downward
(
descr
,
XDES_FREE_BIT
,
FALSE
,
FSP_EXTENT_SIZE
-
1
,
mtr
);
if
(
bit
==
ULINT_UNDEFINED
)
{
descr
=
fseg_get_prev_extent
(
header
,
descr
,
mtr
);
}
else
{
prev_page_no
=
xdes_get_offset
(
descr
)
+
bit
;
break
;
}
}
if
(
prev_page_no
==
FIL_NULL
)
{
last_frag_page_addr
=
flst_get_last
(
header
+
FSEG_FRAG
,
mtr
);
prev_page_no
=
last_frag_page_addr
.
page
;
}
return
(
prev_page_no
);
}
/**************************************************************************
Returns the page number of the first segment page. If no pages have been
freed from the segment, and the pages were allocated with the hint page
number always one greater than previous page, then it is guaranteed that
this function returns the first allocated page. */
ulint
fseg_get_first_page_no
(
/*===================*/
/* out: page number, FIL_NULL if no
page found */
fseg_header_t
*
header
,
/* in: segment header */
mtr_t
*
mtr
)
/* in: mtr */
{
buf_block_t
*
block
;
ulint
first_page_no
;
xdes_t
*
descr
;
fil_addr_t
first_frag_page_addr
;
ut_ad
(
header
);
mtr_x_lock
(
&
fsp_latch
,
mtr
);
block
=
buf_block_align
(
header
);
buf_page_x_lock
(
block
,
mtr
);
/* Find first page */
first_frag_page_addr
=
flst_get_first
(
header
+
FSEG_FRAG
,
mtr
);
first_page_no
=
first_frag_page_addr
.
page
;
if
(
first_page_no
==
FIL_NULL
)
{
descr
=
fseg_get_first_extent
(
header
,
mtr
);
first_page_no
=
fseg_extent_get_next_page_no
(
header
,
descr
,
mtr
);
}
return
(
first_page_no
);
}
/**************************************************************************
Returns the page number of the last segment page. If no pages have been
freed from the segment, and the pages were allocated with the hint page
number always one greater than previous page, then it is guaranteed that
this function returns the last allocated page. */
ulint
fseg_get_last_page_no
(
/*==================*/
/* out: page number, FIL_NULL if no
page found */
fseg_header_t
*
header
,
/* in: segment header */
mtr_t
*
mtr
)
/* in: mtr */
{
buf_block_t
*
block
;
ulint
last_page_no
;
xdes_t
*
descr
;
ut_ad
(
header
);
mtr_x_lock
(
&
fsp_latch
,
mtr
);
block
=
buf_block_align
(
header
);
buf_page_x_lock
(
block
,
mtr
);
descr
=
fseg_get_last_non_free_extent
(
header
,
mtr
);
last_page_no
=
fseg_extent_get_prev_page_no
(
header
,
descr
,
mtr
);
return
(
last_page_no
);
}
/**************************************************************************
Returns the page number of the next segment page. If no pages have been
freed from the segment, and the pages were allocated with the hint page
number always one greater than previous page, then it is guaranteed that
this function steps the pages through in the order they were allocated
to the segment. */
ulint
fseg_get_next_page_no
(
/*==================*/
/* out: page number, FIL_NULL if no
page left */
fseg_header_t
*
header
,
/* in: segment header */
ulint
page_no
,
/* in: previous page number */
mtr_t
*
mtr
)
/* in: mtr */
{
buf_block_t
*
block
;
buf_frame_t
*
frame
;
ulint
space
;
ulint
next_page_no
;
xdes_t
*
descr
;
ulint
bit
;
fil_addr_t
next_frag_page_addr
;
fseg_page_header_t
*
page_header
;
ut_ad
(
header
);
mtr_x_lock
(
&
fsp_latch
,
mtr
);
block
=
buf_block_align
(
header
);
buf_page_x_lock
(
block
,
mtr
);
space
=
buf_page_get_space
(
block
);
descr
=
xdes_get_descriptor
(
space
,
page_no
,
mtr
);
ut_ad
(
xdes_get_bit
(
descr
,
XDES_FREE_BIT
,
page_no
%
FSP_EXTENT_SIZE
,
mtr
)
==
FALSE
);
if
(
xdes_get_state
(
descr
,
mtr
)
==
XDES_FSEG
)
{
/* The extent of the current page belongs to the segment */
bit
=
xdes_find_bit
(
descr
,
XDES_FREE_BIT
,
FALSE
,
(
page_no
+
1
)
%
FSP_EXTENT_SIZE
,
mtr
);
if
((
bit
==
ULINT_UNDEFINED
)
||
(
bit
<=
(
page_no
%
FSP_EXTENT_SIZE
)))
{
/* No higher address pages in this extent */
descr
=
fseg_get_next_extent
(
header
,
descr
,
mtr
);
next_page_no
=
fseg_extent_get_next_page_no
(
header
,
descr
,
mtr
);
}
else
{
next_page_no
=
xdes_get_offset
(
descr
)
+
bit
;
}
}
else
{
/* Current page is a fragment page */
block
=
buf_page_get
(
space
,
page_no
,
mtr
);
buf_page_x_lock
(
block
,
mtr
);
frame
=
buf_block_get_frame
(
block
);
page_header
=
frame
+
FSEG_PAGE_HEADER_OFFSET
;
next_frag_page_addr
=
flst_get_next_addr
(
page_header
+
FSEG_PAGE_FRAG_NODE
,
mtr
);
next_page_no
=
next_frag_page_addr
.
page
;
if
(
next_page_no
==
FIL_NULL
)
{
descr
=
fseg_get_first_extent
(
header
,
mtr
);
next_page_no
=
fseg_extent_get_next_page_no
(
header
,
descr
,
mtr
);
}
}
return
(
next_page_no
);
}
/**************************************************************************
Returns the page number of the previous segment page. If no pages have been
freed from the segment, and the pages were allocated with the hint page
number always one greater than the previous page, then it is guaranteed that
this function steps through the pages in the order opposite to the allocation
order of the pages. */
ulint
fseg_get_prev_page_no
(
/*==================*/
/* out: page number, FIL_NULL if no page
left */
fseg_header_t
*
header
,
/* in: segment header */
ulint
page_no
,
/* in: page number */
mtr_t
*
mtr
)
/* in: mtr */
{
buf_block_t
*
block
;
buf_frame_t
*
frame
;
ulint
space
;
ulint
prev_page_no
;
xdes_t
*
descr
;
ulint
bit
;
fil_addr_t
prev_frag_page_addr
;
fseg_page_header_t
*
page_header
;
ut_ad
(
header
);
mtr_x_lock
(
&
fsp_latch
,
mtr
);
block
=
buf_block_align
(
header
);
buf_page_x_lock
(
block
,
mtr
);
space
=
buf_page_get_space
(
block
);
descr
=
xdes_get_descriptor
(
space
,
page_no
,
mtr
);
ut_ad
(
xdes_get_bit
(
descr
,
XDES_FREE_BIT
,
page_no
%
FSP_EXTENT_SIZE
,
mtr
)
==
FALSE
);
if
(
xdes_get_state
(
descr
,
mtr
)
==
XDES_FSEG
)
{
/* The extent of the current page belongs to the segment */
bit
=
xdes_find_bit_downward
(
descr
,
XDES_FREE_BIT
,
FALSE
,
(
page_no
-
1
)
%
FSP_EXTENT_SIZE
,
mtr
);
if
((
bit
==
ULINT_UNDEFINED
)
||
(
bit
>=
(
page_no
%
FSP_EXTENT_SIZE
)))
{
/* No lower address pages in this extent */
descr
=
fseg_get_prev_extent
(
header
,
descr
,
mtr
);
prev_page_no
=
fseg_extent_get_prev_page_no
(
header
,
descr
,
mtr
);
}
else
{
prev_page_no
=
xdes_get_offset
(
descr
)
+
bit
;
}
}
else
{
/* Current page is a fragment page */
block
=
buf_page_get
(
space
,
page_no
,
mtr
);
buf_page_x_lock
(
block
,
mtr
);
frame
=
buf_block_get_frame
(
block
);
page_header
=
frame
+
FSEG_PAGE_HEADER_OFFSET
;
prev_frag_page_addr
=
flst_get_prev_addr
(
page_header
+
FSEG_PAGE_FRAG_NODE
,
mtr
);
prev_page_no
=
prev_frag_page_addr
.
page
;
}
return
(
prev_page_no
);
}
#endif
/***********************************************************************
Validates a segment. */
static
bool
fseg_validate_low
(
/*==============*/
/* out: TRUE if ok */
fseg_header_t
*
header
,
/* in: segment header */
mtr_t
*
mtr2
)
/* in: mtr */
{
ulint
space
;
dulint
seg_id
;
mtr_t
mtr
;
xdes_t
*
descr
;
fil_addr_t
node_addr
;
ulint
n_used
=
0
;
ulint
n_used2
=
0
;
flst_node_t
*
node
;
buf_frame_t
*
frame
;
fseg_page_header_t
*
page_header
;
ut_ad
(
mtr_memo_contains
(
mtr2
,
buf_block_align
(
header
),
MTR_MEMO_BUF_FIX
));
buf_page_x_lock
(
buf_block_align
(
header
),
mtr2
);
space
=
buf_page_get_space
(
buf_block_align
(
header
));
seg_id
=
mtr_read_dulint
(
header
+
FSEG_ID
,
MLOG_8BYTES
,
mtr2
);
n_used
=
mtr_read_ulint
(
header
+
FSEG_NOT_FULL_N_USED
,
MLOG_4BYTES
,
mtr2
);
flst_validate
(
header
+
FSEG_FRAG
,
mtr2
);
flst_validate
(
header
+
FSEG_FREE
,
mtr2
);
flst_validate
(
header
+
FSEG_NOT_FULL
,
mtr2
);
flst_validate
(
header
+
FSEG_FULL
,
mtr2
);
/* Validate FSEG_FREE list */
node_addr
=
flst_get_first
(
header
+
FSEG_FREE
,
mtr2
);
while
(
!
fil_addr_is_null
(
node_addr
))
{
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
descr
=
xdes_lst_get_descriptor
(
space
,
node_addr
,
&
mtr
);
ut_a
(
xdes_get_n_used
(
descr
,
&
mtr
)
==
0
);
ut_a
(
xdes_get_state
(
descr
,
&
mtr
)
==
XDES_FSEG
);
ut_a
(
0
==
ut_dulint_cmp
(
mtr_read_dulint
(
descr
+
XDES_ID
,
MLOG_8BYTES
,
&
mtr
),
seg_id
));
node_addr
=
flst_get_next_addr
(
descr
+
XDES_FLST_NODE
,
&
mtr
);
mtr_commit
(
&
mtr
);
}
/* Validate FSEG_NOT_FULL list */
node_addr
=
flst_get_first
(
header
+
FSEG_NOT_FULL
,
mtr2
);
while
(
!
fil_addr_is_null
(
node_addr
))
{
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
descr
=
xdes_lst_get_descriptor
(
space
,
node_addr
,
&
mtr
);
ut_a
(
xdes_get_n_used
(
descr
,
&
mtr
)
>
0
);
ut_a
(
xdes_get_n_used
(
descr
,
&
mtr
)
<
FSP_EXTENT_SIZE
);
ut_a
(
xdes_get_state
(
descr
,
&
mtr
)
==
XDES_FSEG
);
ut_a
(
0
==
ut_dulint_cmp
(
mtr_read_dulint
(
descr
+
XDES_ID
,
MLOG_8BYTES
,
&
mtr
),
seg_id
));
n_used2
+=
xdes_get_n_used
(
descr
,
&
mtr
);
node_addr
=
flst_get_next_addr
(
descr
+
XDES_FLST_NODE
,
&
mtr
);
mtr_commit
(
&
mtr
);
}
/* Validate FSEG_FULL list */
node_addr
=
flst_get_first
(
header
+
FSEG_FULL
,
mtr2
);
while
(
!
fil_addr_is_null
(
node_addr
))
{
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
descr
=
xdes_lst_get_descriptor
(
space
,
node_addr
,
&
mtr
);
ut_a
(
xdes_get_n_used
(
descr
,
&
mtr
)
==
FSP_EXTENT_SIZE
);
ut_a
(
xdes_get_state
(
descr
,
&
mtr
)
==
XDES_FSEG
);
ut_a
(
0
==
ut_dulint_cmp
(
mtr_read_dulint
(
descr
+
XDES_ID
,
MLOG_8BYTES
,
&
mtr
),
seg_id
));
node_addr
=
flst_get_next_addr
(
descr
+
XDES_FLST_NODE
,
&
mtr
);
mtr_commit
(
&
mtr
);
}
/* Validate FSEG_FRAG list */
node_addr
=
flst_get_first
(
header
+
FSEG_FRAG
,
mtr2
);
while
(
!
fil_addr_is_null
(
node_addr
))
{
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
node
=
fut_get_ptr_x_lock
(
space
,
node_addr
,
&
mtr
);
frame
=
buf_frame_align
(
node
);
page_header
=
frame
+
FSEG_PAGE_HEADER_OFFSET
;
ut_a
(
0
==
ut_dulint_cmp
(
mtr_read_dulint
(
page_header
+
FSEG_PAGE_SEG_ID
,
MLOG_8BYTES
,
&
mtr
),
seg_id
));
node_addr
=
flst_get_next_addr
(
node
,
&
mtr
);
mtr_commit
(
&
mtr
);
}
ut_a
(
n_used
==
n_used2
);
return
(
TRUE
);
}
/***********************************************************************
Validates a segment. */
bool
fseg_validate
(
/*==========*/
/* out: TRUE if ok */
fseg_header_t
*
header
,
/* in: segment header */
mtr_t
*
mtr2
)
/* in: mtr */
{
bool
ret
;
mtr_x_lock
(
&
fsp_latch
,
mtr2
);
ret
=
fseg_validate_low
(
header
,
mtr2
);
return
(
ret
);
}
/***********************************************************************
Writes info of a segment. */
static
void
fseg_print_low
(
/*===========*/
fseg_header_t
*
header
,
/* in: segment header */
mtr_t
*
mtr
)
/* in: mtr */
{
ulint
space
;
ulint
seg_id_low
;
ulint
seg_id_high
;
ulint
n_used
;
ulint
n_frag
;
ulint
n_free
;
ulint
n_not_full
;
ulint
n_full
;
ulint
reserved
;
ulint
used
;
ulint
page_no
;
ut_ad
(
mtr_memo_contains
(
mtr
,
buf_block_align
(
header
),
MTR_MEMO_BUF_FIX
));
buf_page_x_lock
(
buf_block_align
(
header
),
mtr
);
space
=
buf_page_get_space
(
buf_block_align
(
header
));
page_no
=
buf_page_get_offset
(
buf_block_align
(
header
));
reserved
=
fseg_n_reserved_pages_low
(
header
,
&
used
,
mtr
);
seg_id_low
=
ut_dulint_get_low
(
mtr_read_dulint
(
header
+
FSEG_ID
,
MLOG_8BYTES
,
mtr
));
seg_id_high
=
ut_dulint_get_high
(
mtr_read_dulint
(
header
+
FSEG_ID
,
MLOG_8BYTES
,
mtr
));
n_used
=
mtr_read_ulint
(
header
+
FSEG_NOT_FULL_N_USED
,
MLOG_4BYTES
,
mtr
);
n_frag
=
flst_get_len
(
header
+
FSEG_FRAG
,
mtr
);
n_free
=
flst_get_len
(
header
+
FSEG_FREE
,
mtr
);
n_not_full
=
flst_get_len
(
header
+
FSEG_NOT_FULL
,
mtr
);
n_full
=
flst_get_len
(
header
+
FSEG_FULL
,
mtr
);
printf
(
"SEGMENT id %lu %lu space %lu; page %lu; res %lu used %lu; full ext %lu
\n
"
,
seg_id_high
,
seg_id_low
,
space
,
page_no
,
reserved
,
used
,
n_full
);
printf
(
"fragm pages %lu; free extents %lu; not full extents %lu: pages %lu
\n
"
,
n_frag
,
n_free
,
n_not_full
,
n_used
);
}
/***********************************************************************
Writes info of a segment. */
void
fseg_print
(
/*=======*/
fseg_header_t
*
header
,
/* in: segment header */
mtr_t
*
mtr
)
/* in: mtr */
{
mtr_x_lock
(
&
fsp_latch
,
mtr
);
fseg_print_low
(
header
,
mtr
);
}
/***********************************************************************
Validates the file space system and its segments. */
bool
fsp_validate
(
/*=========*/
/* out: TRUE if ok */
ulint
space
)
/* in: space id */
{
fsp_header_t
*
header
;
fseg_header_t
*
seg_header
;
ulint
size
;
ulint
free_limit
;
ulint
frag_n_used
;
mtr_t
mtr
;
mtr_t
mtr2
;
xdes_t
*
descr
;
fil_addr_t
node_addr
;
ulint
descr_count
=
0
;
ulint
n_used
=
0
;
ulint
n_used2
=
0
;
ulint
n_full_frag_pages
;
/* Start first a mini-transaction mtr2 to lock out all other threads
from the fsp system */
mtr_start
(
&
mtr2
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr2
);
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
header
=
fsp_get_space_header
(
space
,
&
mtr
);
size
=
mtr_read_ulint
(
header
+
FSP_SIZE
,
MLOG_4BYTES
,
&
mtr
);
free_limit
=
mtr_read_ulint
(
header
+
FSP_FREE_LIMIT
,
MLOG_4BYTES
,
&
mtr
);
frag_n_used
=
mtr_read_ulint
(
header
+
FSP_FRAG_N_USED
,
MLOG_4BYTES
,
&
mtr
);
n_full_frag_pages
=
FSP_EXTENT_SIZE
*
flst_get_len
(
header
+
FSP_FULL_FRAG
,
&
mtr
);
ut_a
(
free_limit
<=
size
);
flst_validate
(
header
+
FSP_FREE
,
&
mtr
);
flst_validate
(
header
+
FSP_FREE_FRAG
,
&
mtr
);
flst_validate
(
header
+
FSP_FULL_FRAG
,
&
mtr
);
flst_validate
(
header
+
FSP_SEGS
,
&
mtr
);
mtr_commit
(
&
mtr
);
/* Validate FSP_FREE list */
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
header
=
fsp_get_space_header
(
space
,
&
mtr
);
node_addr
=
flst_get_first
(
header
+
FSP_FREE
,
&
mtr
);
mtr_commit
(
&
mtr
);
while
(
!
fil_addr_is_null
(
node_addr
))
{
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
descr_count
++
;
descr
=
xdes_lst_get_descriptor
(
space
,
node_addr
,
&
mtr
);
ut_a
(
xdes_get_n_used
(
descr
,
&
mtr
)
==
0
);
ut_a
(
xdes_get_state
(
descr
,
&
mtr
)
==
XDES_FREE
);
node_addr
=
flst_get_next_addr
(
descr
+
XDES_FLST_NODE
,
&
mtr
);
mtr_commit
(
&
mtr
);
}
/* Validate FSP_FREE_FRAG list */
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
header
=
fsp_get_space_header
(
space
,
&
mtr
);
node_addr
=
flst_get_first
(
header
+
FSP_FREE_FRAG
,
&
mtr
);
mtr_commit
(
&
mtr
);
while
(
!
fil_addr_is_null
(
node_addr
))
{
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
descr_count
++
;
descr
=
xdes_lst_get_descriptor
(
space
,
node_addr
,
&
mtr
);
ut_a
(
xdes_get_n_used
(
descr
,
&
mtr
)
>
0
);
ut_a
(
xdes_get_n_used
(
descr
,
&
mtr
)
<
FSP_EXTENT_SIZE
);
ut_a
(
xdes_get_state
(
descr
,
&
mtr
)
==
XDES_FREE_FRAG
);
n_used
+=
xdes_get_n_used
(
descr
,
&
mtr
);
node_addr
=
flst_get_next_addr
(
descr
+
XDES_FLST_NODE
,
&
mtr
);
mtr_commit
(
&
mtr
);
}
/* Validate FSP_FULL_FRAG list */
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
header
=
fsp_get_space_header
(
space
,
&
mtr
);
node_addr
=
flst_get_first
(
header
+
FSP_FULL_FRAG
,
&
mtr
);
mtr_commit
(
&
mtr
);
while
(
!
fil_addr_is_null
(
node_addr
))
{
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
descr_count
++
;
descr
=
xdes_lst_get_descriptor
(
space
,
node_addr
,
&
mtr
);
ut_a
(
xdes_get_n_used
(
descr
,
&
mtr
)
==
FSP_EXTENT_SIZE
);
ut_a
(
xdes_get_state
(
descr
,
&
mtr
)
==
XDES_FULL_FRAG
);
node_addr
=
flst_get_next_addr
(
descr
+
XDES_FLST_NODE
,
&
mtr
);
mtr_commit
(
&
mtr
);
}
/* Validate segments */
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
header
=
fsp_get_space_header
(
space
,
&
mtr
);
node_addr
=
flst_get_first
(
header
+
FSP_SEGS
,
&
mtr
);
mtr_commit
(
&
mtr
);
while
(
!
fil_addr_is_null
(
node_addr
))
{
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
seg_header
=
fut_get_ptr_x_lock
(
space
,
node_addr
,
&
mtr
)
-
FSEG_FLST_NODE
;
fseg_validate_low
(
seg_header
,
&
mtr
);
descr_count
+=
flst_get_len
(
seg_header
+
FSEG_FREE
,
&
mtr
);
descr_count
+=
flst_get_len
(
seg_header
+
FSEG_FULL
,
&
mtr
);
descr_count
+=
flst_get_len
(
seg_header
+
FSEG_NOT_FULL
,
&
mtr
);
n_used2
+=
flst_get_len
(
seg_header
+
FSEG_FRAG
,
&
mtr
);
node_addr
=
flst_get_next_addr
(
seg_header
+
FSEG_FLST_NODE
,
&
mtr
);
mtr_commit
(
&
mtr
);
}
ut_a
(
descr_count
*
FSP_EXTENT_SIZE
==
free_limit
);
ut_a
(
n_used
+
n_full_frag_pages
==
n_used2
+
(
free_limit
+
XDES_DESCRIBED_PER_PAGE
-
1
)
/
XDES_DESCRIBED_PER_PAGE
);
ut_a
(
frag_n_used
==
n_used
);
mtr_commit
(
&
mtr2
);
return
(
TRUE
);
}
/***********************************************************************
Prints info of a file space. */
void
fsp_print
(
/*======*/
ulint
space
)
/* in: space id */
{
fsp_header_t
*
header
;
fseg_header_t
*
seg_header
;
ulint
size
;
ulint
free_limit
;
ulint
frag_n_used
;
mtr_t
mtr
;
mtr_t
mtr2
;
fil_addr_t
node_addr
;
ulint
n_free
;
ulint
n_free_frag
;
ulint
n_full_frag
;
ulint
n_segs
;
ulint
seg_id_low
;
ulint
seg_id_high
;
/* Start first a mini-transaction mtr2 to lock out all other threads
from the fsp system */
mtr_start
(
&
mtr2
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr2
);
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
header
=
fsp_get_space_header
(
space
,
&
mtr
);
size
=
mtr_read_ulint
(
header
+
FSP_SIZE
,
MLOG_4BYTES
,
&
mtr
);
free_limit
=
mtr_read_ulint
(
header
+
FSP_FREE_LIMIT
,
MLOG_4BYTES
,
&
mtr
);
frag_n_used
=
mtr_read_ulint
(
header
+
FSP_FRAG_N_USED
,
MLOG_4BYTES
,
&
mtr
);
n_free
=
flst_get_len
(
header
+
FSP_FREE
,
&
mtr
);
n_free_frag
=
flst_get_len
(
header
+
FSP_FREE_FRAG
,
&
mtr
);
n_full_frag
=
flst_get_len
(
header
+
FSP_FULL_FRAG
,
&
mtr
);
n_segs
=
flst_get_len
(
header
+
FSP_SEGS
,
&
mtr
);
seg_id_low
=
ut_dulint_get_low
(
mtr_read_dulint
(
header
+
FSP_SEG_ID
,
MLOG_8BYTES
,
&
mtr
));
seg_id_high
=
ut_dulint_get_high
(
mtr_read_dulint
(
header
+
FSP_SEG_ID
,
MLOG_8BYTES
,
&
mtr
));
printf
(
"FILE SPACE INFO: id %lu
\n
"
,
space
);
printf
(
"size %lu, free limit %lu, free extents %lu
\n
"
,
size
,
free_limit
,
n_free
);
printf
(
"not full frag extents %lu: used pages %lu, full frag extents %lu
\n
"
,
n_free_frag
,
frag_n_used
,
n_full_frag
);
printf
(
"number of segments %lu, first seg id not used %lu %lu
\n
"
,
n_segs
,
seg_id_high
,
seg_id_low
);
/* Print segments */
node_addr
=
flst_get_first
(
header
+
FSP_SEGS
,
&
mtr
);
mtr_commit
(
&
mtr
);
while
(
!
fil_addr_is_null
(
node_addr
))
{
mtr_start
(
&
mtr
);
mtr_x_lock
(
&
fsp_latch
,
&
mtr
);
seg_header
=
fut_get_ptr_x_lock
(
space
,
node_addr
,
&
mtr
)
-
FSEG_FLST_NODE
;
fseg_print_low
(
seg_header
,
&
mtr
);
node_addr
=
flst_get_next_addr
(
seg_header
+
FSEG_FLST_NODE
,
&
mtr
);
mtr_commit
(
&
mtr
);
}
mtr_commit
(
&
mtr2
);
}
innobase/include/usr0sess.h
View file @
ec06c782
...
@@ -66,28 +66,6 @@ sess_open(
...
@@ -66,28 +66,6 @@ sess_open(
byte
*
addr_buf
,
/* in: client address */
byte
*
addr_buf
,
/* in: client address */
ulint
addr_len
);
/* in: client address length */
ulint
addr_len
);
/* in: client address length */
/*************************************************************************
/*************************************************************************
Closes a session, freeing the memory occupied by it. */
void
sess_close
(
/*=======*/
sess_t
*
sess
);
/* in, own: session object */
/*************************************************************************
Raises an SQL error. */
void
sess_raise_error_low
(
/*=================*/
trx_t
*
trx
,
/* in: transaction */
ulint
err_no
,
/* in: error number */
ulint
type
,
/* in: more info of the error, or 0 */
dict_table_t
*
table
,
/* in: dictionary table or NULL */
dict_index_t
*
index
,
/* in: table index or NULL */
dtuple_t
*
tuple
,
/* in: tuple to insert or NULL */
rec_t
*
rec
,
/* in: record or NULL */
char
*
err_str
);
/* in: arbitrary null-terminated error string,
or NULL */
/*************************************************************************
Closes a session, freeing the memory occupied by it, if it is in a state
Closes a session, freeing the memory occupied by it, if it is in a state
where it should be closed. */
where it should be closed. */
...
@@ -117,16 +95,6 @@ sess_srv_msg_send_simple(
...
@@ -117,16 +95,6 @@ sess_srv_msg_send_simple(
ulint
rel_kernel
);
/* in: SESS_RELEASE_KERNEL or
ulint
rel_kernel
);
/* in: SESS_RELEASE_KERNEL or
SESS_NOT_RELEASE_KERNEL */
SESS_NOT_RELEASE_KERNEL */
/***************************************************************************
/***************************************************************************
Processes a message from a client. NOTE: May release the kernel mutex
temporarily. */
void
sess_receive_msg_rel_kernel
(
/*========================*/
sess_t
*
sess
,
/* in: session */
byte
*
str
,
/* in: message string */
ulint
len
);
/* in: message length */
/***************************************************************************
When a command has been completed, this function sends the message about it
When a command has been completed, this function sends the message about it
to the client. */
to the client. */
...
@@ -136,17 +104,6 @@ sess_command_completed_message(
...
@@ -136,17 +104,6 @@ sess_command_completed_message(
sess_t
*
sess
,
/* in: session */
sess_t
*
sess
,
/* in: session */
byte
*
msg
,
/* in: message buffer */
byte
*
msg
,
/* in: message buffer */
ulint
len
);
/* in: message data length */
ulint
len
);
/* in: message data length */
/***********************************************************************
Starts a new connection and a session, or starts a query based on a client
message. This is called by a SRV_COM thread. */
void
sess_process_cli_msg
(
/*=================*/
byte
*
str
,
/* in: message string */
ulint
len
,
/* in: string length */
byte
*
addr
,
/* in: address string */
ulint
alen
);
/* in: address length */
/* The session handle. All fields are protected by the kernel mutex */
/* The session handle. All fields are protected by the kernel mutex */
...
...
innobase/lock/lock0lock.c
View file @
ec06c782
...
@@ -3063,11 +3063,6 @@ retry:
...
@@ -3063,11 +3063,6 @@ retry:
ut_a
(
strlen
(
lock_latest_err_buf
)
<
4100
);
ut_a
(
strlen
(
lock_latest_err_buf
)
<
4100
);
/*
sess_raise_error_low(trx, DB_DEADLOCK, lock->type_mode, table,
index, NULL, NULL, NULL);
*/
return
(
TRUE
);
return
(
TRUE
);
}
}
...
...
innobase/log/trash/log0trsh.c
deleted
100644 → 0
View file @
38b46781
/******************************************************
Recovery
(c) 1997 Innobase Oy
Created 9/20/1997 Heikki Tuuri
*******************************************************/
#include "log0recv.h"
#ifdef UNIV_NONINL
#include "log0recv.ic"
#endif
#include "mem0mem.h"
#include "buf0buf.h"
#include "buf0flu.h"
#include "srv0srv.h"
/* Size of block reads when the log groups are scanned forward to do
roll-forward */
#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
/* Size of block reads when the log groups are scanned backwards to synchronize
them */
#define RECV_BACK_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
recv_sys_t
*
recv_sys
=
NULL
;
recv_recover_page
(
block
->
frame
,
block
->
space
,
block
->
offset
);
/************************************************************
Creates the recovery system. */
void
recv_sys_create
(
void
)
/*=================*/
{
ut_a
(
recv_sys
==
NULL
);
recv_sys
=
mem_alloc
(
sizeof
(
recv_t
));
mutex_create
(
&
(
recv_sys
->
mutex
));
recv_sys
->
hash
=
NULL
;
recv_sys
->
heap
=
NULL
;
}
/************************************************************
Inits the recovery system for a recovery operation. */
void
recv_sys_init
(
void
)
/*===============*/
{
recv_sys
->
hash
=
hash_create
(
buf_pool_get_curr_size
()
/
64
);
recv_sys
->
heap
=
mem_heap_create_in_buffer
(
256
);
}
/************************************************************
Empties the recovery system. */
void
recv_sys_empty
(
void
)
/*================*/
{
mutex_enter
(
&
(
recv_sys
->
mutex
));
hash_free
(
recv_sys
->
hash
);
mem_heap_free
(
recv_sys
->
heap
);
recv_sys
->
hash
=
NULL
;
recv_sys
->
heap
=
NULL
;
mutex_exit
(
&
(
recv_sys
->
mutex
));
}
/***********************************************************
For recovery purposes copies the log buffer to a group to synchronize log
data. */
static
void
recv_log_buf_flush
(
/*===============*/
log_group_t
*
group
,
/* in: log group */
dulint
start_lsn
,
/* in: start lsn of the log data in
the log buffer; must be divisible by
OS_FILE_LOG_BLOCK_SIZE */
dulint
end_lsn
)
/* in: end lsn of the log data in the
log buffer; must be divisible by
OS_FILE_LOG_BLOCK_SIZE */
{
ulint
len
;
ut_ad
(
mutex_own
(
&
(
log_sys
->
mutex
)));
len
=
ut_dulint_minus
(
end_lsn
,
start_lsn
);
log_group_write_buf
(
LOG_RECOVER
,
group
,
log_sys
->
buf
,
len
,
start_lsn
,
0
);
}
/***********************************************************
Compares two buffers containing log segments and determines the highest lsn
where they match, if any. */
static
dulint
recv_log_bufs_cmp
(
/*==============*/
/* out: if no match found, ut_dulint_zero or
if start_lsn == LOG_START_LSN, returns
LOG_START_LSN; otherwise the highest matching
lsn */
byte
*
recv_buf
,
/* in: buffer containing valid log data */
byte
*
buf
,
/* in: buffer of data from a possibly
incompletely written log group */
dulint
start_lsn
,
/* in: buffer start lsn, must be divisible
by OS_FILE_LOG_BLOCK_SIZE and must be >=
LOG_START_LSN */
dulint
end_lsn
,
/* in: buffer end lsn, must be divisible
by OS_FILE_LOG_BLOCK_SIZE */
dulint
recovered_lsn
)
/* in: recovery succeeded up to this lsn */
{
ulint
len
;
ulint
offset
;
byte
*
log_block1
;
byte
*
log_block2
;
ulint
no
;
ulint
data_len
;
ut_ad
(
ut_dulint_cmp
(
start_lsn
,
LOG_START_LSN
)
>=
0
);
if
(
ut_dulint_cmp
(
end_lsn
,
recovered_lsn
)
>
0
)
{
end_lsn
=
ut_dulint_align_up
(
recovered_lsn
,
OS_FILE_LOG_BLOCK_SIZE
);
}
len
=
ut_dulint_minus
(
end_lsn
,
start_lsn
);
if
(
len
==
0
)
{
goto
no_match
;
}
ut_ad
(
len
%
OS_FILE_LOG_BLOCK_SIZE
==
0
);
log_block1
=
recv_buf
+
len
;
log_block2
=
buf
+
len
;
for
(;;)
{
log_block1
-=
OS_FILE_LOG_BLOCK_SIZE
;
log_block2
-=
OS_FILE_LOG_BLOCK_SIZE
;
no
=
log_block_get_hdr_no
(
log_block1
);
ut_a
(
no
==
log_block_get_trl_no
(
log_block1
));
if
((
no
==
log_block_get_hdr_no
(
log_block2
))
&&
(
no
==
log_block_get_trl_no
(
log_block2
)))
{
/* Match found if the block is not corrupted */
data_len
=
log_block_get_data_len
(
log_block2
);
if
(
0
==
ut_memcmp
(
log_block1
+
LOG_BLOCK_DATA
,
log_block2
+
LOG_BLOCK_DATA
,
data_len
-
LOG_BLOCK_DATA
))
{
/* Match found */
return
(
ut_dulint_add
(
start_lsn
,
log_block2
-
buf
+
data_len
));
}
}
if
(
log_block1
==
recv_buf
)
{
/* No match found */
break
;
}
}
no_match:
if
(
ut_dulint_cmp
(
start_lsn
,
LOG_START_LSN
)
==
0
)
{
return
(
LOG_START_LSN
);
}
return
(
ut_dulint_zero
);
}
/************************************************************
Copies a log segment from the most up-to-date log group to the other log
group, so that it contains the latest log data. */
static
void
recv_copy_group
(
/*============*/
log_group_t
*
up_to_date_group
,
/* in: the most up-to-date
log group */
log_group_t
*
group
,
/* in: copy to this log group */
dulint_lsn
recovered_lsn
)
/* in: recovery succeeded up
to this lsn */
{
dulint
start_lsn
;
dulint
end_lsn
;
dulint
match
;
byte
*
buf
;
byte
*
buf1
;
ut_ad
(
mutex_own
(
&
(
log_sys
->
mutex
)));
if
(
0
==
ut_dulint_cmp
(
LOG_START_LSN
,
recovered_lsn
))
{
return
;
}
ut_ad
(
RECV_BACK_SCAN_SIZE
<=
log_sys
->
buf_size
);
buf1
=
mem_alloc
(
2
*
RECV_BACK_SCAN_SIZE
);
buf
=
ut_align
(
buf
,
RECV_BACK_SCAN_SIZE
););
end_lsn
=
ut_dulint_align_up
(
recovered_lsn
,
RECV_BACK_SCAN_SIZE
);
match
=
ut_dulint_zero
;
for
(;;)
{
if
(
ut_dulint_cmp
(
ut_dulint_add
(
LOG_START_LSN
,
RECV_BACK_SCAN_SIZE
),
end_lsn
)
>=
0
)
{
start_lsn
=
LOG_START_LSN
;
}
else
{
start_lsn
=
ut_dulint_subtract
(
end_lsn
,
RECV_BACK_SCAN_SIZE
);
}
log_group_read_log_seg
(
LOG_RECOVER
,
buf
,
group
,
start_lsn
,
end_lsn
);
log_group_read_log_seg
(
LOG_RECOVER
,
log_sys
->
buf
,
up_to_date_group
,
start_lsn
,
end_lsn
);
match
=
recv_log_bufs_cmp
(
log_sys
->
buf
,
buf
,
start_lsn
,
end_lsn
,
recovered_lsn
);
if
(
ut_dulint_cmp
(
match
,
recovered_lsn
)
!=
0
)
{
recv_log_buf_flush
(
group
,
start_lsn
,
end_lsn
);
}
if
(
!
ut_dulint_zero
(
match
))
{
mem_free
(
buf1
);
return
;
}
end_lsn
=
start_lsn
;
}
}
/************************************************************
Copies a log segment from the most up-to-date log group to the other log
groups, so that they all contain the latest log data. Also writes the info
about the latest checkpoint to the groups, and inits the fields in the group
memory structs to up-to-date values. */
void
recv_synchronize_groups
(
/*====================*/
log_group_t
*
up_to_date_group
,
/* in: the most up-to-date
log group */
dulint_lsn
recovered_lsn
,
/* in: recovery succeeded up
to this lsn */
log_group_t
*
max_checkpoint_group
)
/* in: the group with the most
recent checkpoint info */
{
log_group_t
*
group
;
ut_ad
(
mutex_own
(
&
(
log_sys
->
mutex
)));
group
=
UT_LIST_GET_FIRST
(
log_sys
->
log_groups
);
while
(
group
)
{
if
(
group
!=
up_to_date_group
)
{
/* Copy log data */
recv_copy_group
(
group
,
up_to_date_group
,
recovered_lsn
);
}
if
(
group
!=
max_checkpoint_group
)
{
/* Copy the checkpoint info to the group */
log_group_checkpoint
(
group
);
mutex_exit
(
&
(
log_sys
->
mutex
));
/* Wait for the checkpoint write to complete */
rw_lock_s_lock
(
&
(
log_sys
->
checkpoint_lock
));
rw_lock_s_unlock
(
&
(
log_sys
->
checkpoint_lock
));
mutex_enter
(
&
(
log_sys
->
mutex
));
}
/* Update the fields in the group struct to correspond to
recovered_lsn */
log_group_set_fields
(
group
,
recovered_lsn
);
group
=
UT_LIST_GET_NEXT
(
log_groups
,
group
);
}
}
/************************************************************
Looks for the maximum consistent checkpoint from the log groups. */
static
ulint
recv_find_max_checkpoint
(
/*=====================*/
/* out: error code or DB_SUCCESS */
log_group_t
**
max_group
,
/* out: max group */
ulint
*
max_field
)
/* out: LOG_CHECKPOINT_1 or
LOG_CHECKPOINT_2 */
{
log_group_t
*
group
;
dulint
max_no
;
dulint
cp_no
;
ulint
field
;
ulint
fold
;
byte
*
buf
;
ut_ad
(
mutex_own
(
&
(
log_sys
->
mutex
)));
/* Look for the latest checkpoint from the log groups */
group
=
UT_LIST_GET_FIRST
(
log_sys
->
log_groups
);
checkpoint_no
=
ut_dulint_zero
;
checkpoint_lsn
=
ut_dulint_zero
;
*
max_group
=
NULL
;
buf
=
log_sys
->
checkpoint_buf
;
while
(
group
)
{
group
->
state
=
LOG_GROUP_CORRUPTED
;
for
(
field
=
LOG_CHECKPOINT_1
;
field
<=
LOG_CHECKPOINT_2
;
field
+=
LOG_CHECKPOINT_2
-
LOG_CHECKPOINT_1
)
{
log_group_read_checkpoint_info
(
group
,
field
);
/* Check the consistency of the checkpoint info */
fold
=
ut_fold_binary
(
buf
,
LOG_CHECKPOINT_CHECKSUM_1
);
if
(
fold
!=
mach_read_from_4
(
buf
+
LOG_CHECKPOINT_CHECKSUM_1
))
{
goto
not_consistent
;
}
fold
=
ut_fold_binary
(
buf
+
LOG_CHECKPOINT_LSN
,
LOG_CHECKPOINT_CHECKSUM_2
-
LOG_CHECKPOINT_LSN
);
if
(
fold
!=
mach_read_from_4
(
buf
+
LOG_CHECKPOINT_CHECKSUM_2
))
{
goto
not_consistent
;
}
group
->
state
=
LOG_GROUP_OK
;
group
->
lsn
=
mach_read_from_8
(
buf
+
LOG_CHECKPOINT_LSN
);
group
->
lsn_offset
=
mach_read_from_4
(
buf
+
LOG_CHECKPOINT_OFFSET
);
group
->
lsn_file_count
=
mach_read_from_4
(
buf
+
LOG_CHECKPOINT_FILE_COUNT
);
cp_no
=
mach_read_from_8
(
buf
+
LOG_CHECKPOINT_NO
);
if
(
ut_dulint_cmp
(
cp_no
,
max_no
)
>=
0
)
{
*
max_group
=
group
;
*
max_field
=
field
;
max_no
=
cp_no
;
}
not_consistent:
}
group
=
UT_LIST_GET_NEXT
(
log_groups
,
group
);
}
if
(
*
max_group
==
NULL
)
{
return
(
DB_ERROR
);
}
return
(
DB_SUCCESS
);
}
/***********************************************************
Parses log records from a buffer and stores them to a hash table to wait
merging to file pages. If the hash table becomes too big, merges automatically
it to file pages. */
static
bool
recv_parse_and_hash_log_recs
(
/*=========================*/
/* out: TRUE if limit_lsn has been reached */
byte
*
buf
,
/* in: buffer containing a log segment or
garbage */
ulint
len
,
/* in: buffer length */
dulint
start_lsn
,
/* in: buffer start lsn */
dulint
limit_lsn
,
/* in: recover at least to this lsn */
dulint
*
recovered_lsn
)
/* out: was able to parse up to this lsn */
{
}
/************************************************************
Recovers from a checkpoint. When this function returns, the database is able
to start processing new user transactions, but the function
recv_recovery_from_checkpoint_finish should be called later to complete
the recovery and free the resources used in it. */
ulint
recv_recovery_from_checkpoint_start
(
/*================================*/
/* out: error code or DB_SUCCESS */
dulint
limit_lsn
)
/* in: recover up to this lsn if possible */
{
log_group_t
*
max_cp_group
;
log_group_t
*
up_to_date_group
;
ulint
max_cp_field
;
byte
*
buf
;
ulint
err
;
dulint
checkpoint_lsn
;
dulint
checkpoint_no
;
dulint
recovered_lsn
;
dulint
old_lsn
;
dulint
end_lsn
;
dulint
start_lsn
;
bool
finished
;
dulint
flush_start_lsn
;
mutex_enter
(
&
(
log_sys
->
mutex
));
/* Look for the latest checkpoint from any of the log groups */
err
=
recv_find_max_checkpoint
(
&
max_cp_group
,
&
max_cp_field
);
if
(
err
!=
DB_SUCCESS
)
{
mutex_exit
(
&
(
log_sys
->
mutex
));
return
(
err
);
}
log_group_read_checkpoint_info
(
max_cp_group
,
max_cp_field
);
buf
=
log_sys
->
checkpoint_buf
;
checkpoint_lsn
=
mach_read_from_8
(
buf
+
LOG_CHECKPOINT_LSN
);
checkpoint_no
=
mach_read_from_8
(
buf
+
LOG_CHECKPOINT_NO
);
if
(
ut_dulint_cmp
(
limit_lsn
,
checkpoint_lsn
)
<
0
)
{
mutex_exit
(
&
(
log_sys
->
mutex
));
return
(
DB_ERROR
);
}
/* Start reading the log groups from the checkpoint lsn up. The
variable flush_start_lsn tells a lsn up to which the log is known
to be contiguously written in all log groups. */
recovered_lsn
=
checkpoint_lsn
;
flush_start_lsn
=
ut_dulint_align_down
(
checkpoint_lsn
,
OS_FILE_LOG_BLOCK_SIZE
);
up_to_date_group
=
max_cp_group
;
ut_ad
(
RECV_SCAN_SIZE
<=
log_sys
->
buf_size
);
group
=
UT_LIST_GET_FIRST
(
log_sys
->
log_groups
);
while
(
group
)
{
finished
=
FALSE
;
if
(
group
->
state
==
LOG_GROUP_CORRUPTED
)
{
finished
=
TRUE
;
}
start_lsn
=
flush_start_lsn
;
while
(
!
finished
)
{
end_lsn
=
ut_dulint_add
(
start_lsn
,
RECV_SCAN_SIZE
);
log_group_read_log_seg
(
LOG_RECOVER
,
log_sys
->
buf
,
group
,
start_lsn
,
end_lsn
);
old_lsn
=
recovered_lsn
;
finished
=
recv_parse_and_hash_log_recs
(
log_sys
->
buf
,
RECV_SCAN_SIZE
,
start_lsn
,
limit_lsn
,
&
flush_start_lsn
,
&
recovered_lsn
);
if
(
ut_dulint_cmp
(
recovered_lsn
,
old_lsn
)
>
0
)
{
/* We found a more up-to-date group */
up_to_date_group
=
group
;
}
start_lsn
=
end_lsn
;
}
group
=
UT_LIST_GET_NEXT
(
log_groups
,
group
);
}
/* Delete possible corrupted or extra log records from all log
groups */
recv_truncate_groups
(
recovered_lsn
);
/* Synchronize the uncorrupted log groups to the most up-to-date log
group; we may also have to copy checkpoint info to groups */
log_sys
->
next_checkpoint_lsn
=
checkpoint_lsn
;
log_sys
->
next_checkpoint_no
=
checkpoint_no
;
recv_synchronize_groups
(
up_to_date_group
,
_lsn
,
max_cp_group
);
log_sys
->
next_checkpoint_no
=
ut_dulint_add
(
checkpoint_no
,
1
);
/* The database is now ready to start almost normal processing of user
transactions */
return
(
DB_SUCCESS
);
}
/************************************************************
Completes recovery from a checkpoint. */
void
recv_recovery_from_checkpoint_finish
(
void
)
/*======================================*/
{
/* Rollback the uncommitted transactions which have no user session */
trx_rollback_all_without_sess
();
/* Merge the hashed log records */
recv_merge_hashed_log_recs
();
/* Free the resources of the recovery system */
recv_sys_empty
();
}
/****************************************************************
Writes to the log a record about incrementing the row id counter. */
UNIV_INLINE
void
log_write_row_id_incr_rec
(
void
)
/*===========================*/
{
log_t
*
log
=
log_sys
;
ulint
data_len
;
mutex_enter
(
&
(
log
->
mutex
));
data_len
=
(
log
->
buf_free
%
OS_FILE_LOG_BLOCK_SIZE
)
+
1
;
if
(
data_len
>=
OS_FILE_LOG_BLOCK_SIZE
-
LOG_BLOCK_TRL_SIZE
)
{
/* The string does not fit within the current log block
or the the block would become full */
mutex_exit
(
&
(
log
->
mutex
));
log_write_row_id_incr_rec_slow
();
return
;
}
*
(
log
->
buf
+
log
->
buf_free
)
=
MLOG_INCR_ROW_ID
|
MLOG_SINGLE_REC_FLAG
;
log_block_set_data_len
(
ut_align_down
(
log
->
buf
+
log
->
buf_free
,
OS_FILE_LOG_BLOCK_SIZE
),
data_len
);
#ifdef UNIV_LOG_DEBUG
log
->
old_buf_free
=
log
->
buf_free
;
log
->
old_lsn
=
log
->
lsn
;
log_check_log_recs
(
log
->
buf
+
log
->
buf_free
,
1
,
log
->
lsn
);
#endif
log
->
buf_free
++
;
ut_ad
(
log
->
buf_free
<=
log
->
buf_size
);
UT_DULINT_INC
(
log
->
lsn
);
mutex_exit
(
&
(
log
->
mutex
));
}
/****************************************************************
Writes to the log a record about incrementing the row id counter. */
static
void
log_write_row_id_incr_rec_slow
(
void
)
/*================================*/
{
byte
type
;
log_reserve_and_open
(
1
);
type
=
MLOG_INCR_ROW_ID
|
MLOG_SINGLE_REC_FLAG
;
log_write_low
(
&
type
,
1
);
log_close
();
log_release
();
}
/**************************************************************************
Parses and applies a log record MLOG_SET_ROW_ID. */
byte
*
dict_hdr_parse_set_row_id
(
/*======================*/
/* out: end of log record or NULL */
byte
*
ptr
,
/* in: buffer */
byte
*
end_ptr
,
/* in: buffer end */
page_t
*
page
)
/* in: page or NULL */
{
dulint
dval
;
ptr
=
mach_dulint_parse_compressed
(
ptr
,
end_ptr
,
&
dval
);
if
(
ptr
==
NULL
)
{
return
(
NULL
);
}
if
(
!
page
)
{
return
(
ptr
);
}
mach_write_to_8
(
page
+
DICT_HDR
+
DICT_HDR_ROW_ID
,
dval
);
return
(
ptr
);
}
innobase/os/os0fileold.c
deleted
100644 → 0
View file @
38b46781
/******************************************************
The interface to the operating system file i/o primitives
(c) 1995 Innobase Oy
Created 10/21/1995 Heikki Tuuri
*******************************************************/
#include "os0file.h"
#include "os0sync.h"
#include "ut0mem.h"
#ifndef __WIN__
#include <errno.h>
#endif
/* We use these mutexes to protect lseek + file i/o operation, if the
OS does not provide an atomic pread or pwrite, or similar */
#define OS_FILE_N_SEEK_MUTEXES 16
os_mutex_t
os_file_seek_mutexes
[
OS_FILE_N_SEEK_MUTEXES
];
/* In simulated aio, merge at most this many consecutive i/os */
#define OS_AIO_MERGE_N_CONSECUTIVE 32
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads */
bool
os_aio_use_native_aio
=
FALSE
;
/* The aio array slot structure */
typedef
struct
os_aio_slot_struct
os_aio_slot_t
;
struct
os_aio_slot_struct
{
bool
is_read
;
/* TRUE if a read operation */
ulint
pos
;
/* index of the slot in the aio
array */
bool
reserved
;
/* TRUE if this slot is reserved */
ulint
len
;
/* length of the block to read or
write */
byte
*
buf
;
/* buffer used in i/o */
ulint
type
;
/* OS_FILE_READ or OS_FILE_WRITE */
ulint
offset
;
/* 32 low bits of file offset in
bytes */
ulint
offset_high
;
/* 32 high bits of file offset */
os_file_t
file
;
/* file where to read or write */
char
*
name
;
/* file name or path */
bool
io_already_done
;
/* used only in simulated aio:
TRUE if the physical i/o already
made and only the slot message
needs to be passed to the caller
of os_aio_simulated_handle */
void
*
message1
;
/* message which is given by the */
void
*
message2
;
/* the requester of an aio operation
and which can be used to identify
which pending aio operation was
completed */
#ifdef WIN_ASYNC_IO
OVERLAPPED
control
;
/* Windows control block for the
aio request */
#elif defined(POSIX_ASYNC_IO)
struct
aiocb
control
;
/* Posix control block for aio
request */
#endif
};
/* The aio array structure */
typedef
struct
os_aio_array_struct
os_aio_array_t
;
struct
os_aio_array_struct
{
os_mutex_t
mutex
;
/* the mutex protecting the aio array */
os_event_t
not_full
;
/* The event which is set to signaled
state when there is space in the aio
outside the ibuf segment */
ulint
n_slots
;
/* Total number of slots in the aio array.
This must be divisible by n_threads. */
ulint
n_segments
;
/* Number of segments in the aio array of
pending aio requests. A thread can wait
separately for any one of the segments. */
ulint
n_reserved
;
/* Number of reserved slots in the
aio array outside the ibuf segment */
os_aio_slot_t
*
slots
;
/* Pointer to the slots in the array */
os_event_t
*
events
;
/* Pointer to an array of event handles
where we copied the handles from slots,
in the same order. This can be used in
WaitForMultipleObjects; used only in
Windows */
};
/* Array of events used in simulated aio */
os_event_t
*
os_aio_segment_wait_events
=
NULL
;
/* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
are NULL when the module has not yet been initialized. */
os_aio_array_t
*
os_aio_read_array
=
NULL
;
os_aio_array_t
*
os_aio_write_array
=
NULL
;
os_aio_array_t
*
os_aio_ibuf_array
=
NULL
;
os_aio_array_t
*
os_aio_log_array
=
NULL
;
os_aio_array_t
*
os_aio_sync_array
=
NULL
;
ulint
os_aio_n_segments
=
ULINT_UNDEFINED
;
/***************************************************************************
Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
overwrite the error number). If the number is not known to this program,
the OS error number + 100 is returned. */
ulint
os_file_get_last_error
(
void
)
/*========================*/
/* out: error number, or OS error number + 100 */
{
ulint
err
;
#ifdef __WIN__
err
=
(
ulint
)
GetLastError
();
if
(
err
==
ERROR_FILE_NOT_FOUND
)
{
return
(
OS_FILE_NOT_FOUND
);
}
else
if
(
err
==
ERROR_DISK_FULL
)
{
return
(
OS_FILE_DISK_FULL
);
}
else
if
(
err
==
ERROR_FILE_EXISTS
)
{
return
(
OS_FILE_ALREADY_EXISTS
);
}
else
{
return
(
100
+
err
);
}
#else
err
=
(
ulint
)
errno
;
printf
(
"%lu
\n
"
,
err
);
perror
(
"os0file:"
);
if
(
err
==
ENOSPC
)
{
return
(
OS_FILE_DISK_FULL
);
#ifdef POSIX_ASYNC_IO
}
else
if
(
err
==
EAGAIN
)
{
return
(
OS_FILE_AIO_RESOURCES_RESERVED
);
#endif
}
else
if
(
err
==
ENOENT
)
{
return
(
OS_FILE_NOT_FOUND
);
}
else
if
(
err
==
EEXIST
)
{
return
(
OS_FILE_ALREADY_EXISTS
);
}
else
{
return
(
100
+
err
);
}
#endif
}
/********************************************************************
Does error handling when a file operation fails. If we have run out
of disk space, then the user can clean the disk. If we do not find
a specified file, then the user can copy it to disk. */
static
bool
os_file_handle_error
(
/*=================*/
/* out: TRUE if we should retry the operation */
os_file_t
file
,
/* in: file pointer */
char
*
name
)
/* in: name of a file or NULL */
{
int
input_char
;
ulint
err
;
err
=
os_file_get_last_error
();
if
(
err
==
OS_FILE_DISK_FULL
)
{
ask_again:
printf
(
"
\n
"
);
if
(
name
)
{
printf
(
"Innobase encountered a problem with file %s.
\n
"
,
name
);
}
printf
(
"Disk is full. Try to clean the disk to free space
\n
"
);
printf
(
"before answering the following: How to continue?
\n
"
);
printf
(
"(Y == freed some space: try again)
\n
"
);
printf
(
"(N == crash the database: will restart it)?
\n
"
);
ask_with_no_question:
input_char
=
getchar
();
if
(
input_char
==
(
int
)
'N'
)
{
ut_error
;
return
(
FALSE
);
}
else
if
(
input_char
==
(
int
)
'Y'
)
{
return
(
TRUE
);
}
else
if
(
input_char
==
(
int
)
'\n'
)
{
goto
ask_with_no_question
;
}
else
{
goto
ask_again
;
}
}
else
if
(
err
==
OS_FILE_AIO_RESOURCES_RESERVED
)
{
return
(
TRUE
);
}
else
{
ut_error
;
}
return
(
FALSE
);
}
/********************************************************************
Opens an existing file or creates a new. */
os_file_t
os_file_create
(
/*===========*/
/* out, own: handle to the file, not defined if error,
error number can be retrieved with os_get_last_error */
char
*
name
,
/* in: name of the file or path as a null-terminated
string */
ulint
create_mode
,
/* in: OS_FILE_OPEN if an existing file is opened
(if does not exist, error), or OS_FILE_CREATE if a new
file is created (if exists, error), OS_FILE_OVERWRITE
if a new is created or an old overwritten */
ulint
purpose
,
/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
is desired, OS_FILE_NORMAL, if any normal file */
bool
*
success
)
/* out: TRUE if succeed, FALSE if error */
{
#ifdef __WIN__
os_file_t
file
;
DWORD
create_flag
;
DWORD
attributes
;
bool
retry
;
try_again:
ut_a
(
name
);
if
(
create_mode
==
OS_FILE_OPEN
)
{
create_flag
=
OPEN_EXISTING
;
}
else
if
(
create_mode
==
OS_FILE_CREATE
)
{
create_flag
=
CREATE_NEW
;
}
else
if
(
create_mode
==
OS_FILE_OVERWRITE
)
{
create_flag
=
CREATE_ALWAYS
;
}
else
{
create_flag
=
0
;
ut_error
;
}
if
(
purpose
==
OS_FILE_AIO
)
{
/* use asynchronous (overlapped) io and no buffering
of writes in the OS */
attributes
=
0
;
#ifdef WIN_ASYNC_IO
if
(
os_aio_use_native_aio
)
{
attributes
=
attributes
|
FILE_FLAG_OVERLAPPED
;
}
#endif
#ifdef UNIV_NON_BUFFERED_IO
attributes
=
attributes
|
FILE_FLAG_NO_BUFFERING
;
#endif
}
else
if
(
purpose
==
OS_FILE_NORMAL
)
{
attributes
=
0
#ifdef UNIV_NON_BUFFERED_IO
|
FILE_FLAG_NO_BUFFERING
#endif
;
}
else
{
attributes
=
0
;
ut_error
;
}
file
=
CreateFile
(
name
,
GENERIC_READ
|
GENERIC_WRITE
,
/* read and write
access */
FILE_SHARE_READ
,
/* file can be read by other
processes */
NULL
,
/* default security attributes */
create_flag
,
attributes
,
NULL
);
/* no template file */
if
(
file
==
INVALID_HANDLE_VALUE
)
{
*
success
=
FALSE
;
if
(
create_mode
!=
OS_FILE_OPEN
&&
os_file_get_last_error
()
==
OS_FILE_DISK_FULL
)
{
retry
=
os_file_handle_error
(
file
,
name
);
if
(
retry
)
{
goto
try_again
;
}
}
}
else
{
*
success
=
TRUE
;
}
return
(
file
);
#else
os_file_t
file
;
int
create_flag
;
bool
retry
;
try_again:
ut_a
(
name
);
if
(
create_mode
==
OS_FILE_OPEN
)
{
create_flag
=
O_RDWR
;
}
else
if
(
create_mode
==
OS_FILE_CREATE
)
{
create_flag
=
O_RDWR
|
O_CREAT
|
O_EXCL
;
}
else
if
(
create_mode
==
OS_FILE_OVERWRITE
)
{
create_flag
=
O_RDWR
|
O_CREAT
|
O_TRUNC
;
}
else
{
create_flag
=
0
;
ut_error
;
}
UT_NOT_USED
(
purpose
);
if
(
create_mode
==
OS_FILE_CREATE
)
{
file
=
open
(
name
,
create_flag
,
S_IRWXU
|
S_IRWXG
|
S_IRWXO
);
}
else
{
file
=
open
(
name
,
create_flag
);
}
if
(
file
==
-
1
)
{
*
success
=
FALSE
;
printf
(
"Error in opening file %s, errno %lu
\n
"
,
name
,
(
ulint
)
errno
);
perror
(
"os0file.c:"
);
if
(
create_mode
!=
OS_FILE_OPEN
&&
errno
==
ENOSPC
)
{
retry
=
os_file_handle_error
(
file
,
name
);
if
(
retry
)
{
goto
try_again
;
}
}
}
else
{
*
success
=
TRUE
;
}
return
(
file
);
#endif
}
/***************************************************************************
Closes a file handle. In case of error, error number can be retrieved with
os_file_get_last_error. */
bool
os_file_close
(
/*==========*/
/* out: TRUE if success */
os_file_t
file
)
/* in, own: handle to a file */
{
#ifdef __WIN__
BOOL
ret
;
ut_a
(
file
);
ret
=
CloseHandle
(
file
);
if
(
ret
)
{
return
(
TRUE
);
}
return
(
FALSE
);
#else
int
ret
;
ret
=
close
(
file
);
if
(
ret
==
-
1
)
{
return
(
FALSE
);
}
return
(
TRUE
);
#endif
}
/***************************************************************************
Gets a file size. */
bool
os_file_get_size
(
/*=============*/
/* out: TRUE if success */
os_file_t
file
,
/* in: handle to a file */
ulint
*
size
,
/* out: least significant 32 bits of file
size */
ulint
*
size_high
)
/* out: most significant 32 bits of size */
{
#ifdef __WIN__
DWORD
high
;
DWORD
low
;
low
=
GetFileSize
(
file
,
&
high
);
if
((
low
==
0xFFFFFFFF
)
&&
(
GetLastError
()
!=
NO_ERROR
))
{
return
(
FALSE
);
}
*
size
=
low
;
*
size_high
=
high
;
return
(
TRUE
);
#else
*
size
=
(
ulint
)
lseek
(
file
,
0
,
SEEK_END
);
*
size_high
=
0
;
return
(
TRUE
);
#endif
}
/***************************************************************************
Sets a file size. This function can be used to extend or truncate a file. */
bool
os_file_set_size
(
/*=============*/
/* out: TRUE if success */
char
*
name
,
/* in: name of the file or path as a
null-terminated string */
os_file_t
file
,
/* in: handle to a file */
ulint
size
,
/* in: least significant 32 bits of file
size */
ulint
size_high
)
/* in: most significant 32 bits of size */
{
#ifdef __WIN__
DWORD
high
;
DWORD
low
;
DWORD
ret
;
BOOL
ret2
;
DWORD
err
;
bool
retry
;
try_again:
low
=
size
;
high
=
size_high
;
ret
=
SetFilePointer
(
file
,
low
,
&
high
,
FILE_BEGIN
);
if
(
ret
==
0xFFFFFFFF
&&
GetLastError
()
!=
NO_ERROR
)
{
err
=
GetLastError
();
goto
error_handling
;
}
ret2
=
SetEndOfFile
(
file
);
if
(
ret2
)
{
ret2
=
os_file_flush
(
file
);
}
if
(
ret2
)
{
return
(
TRUE
);
}
#else
ulint
offset
;
ulint
n_bytes
;
ulint
low
;
ssize_t
ret
;
bool
retry
;
ulint
i
;
byte
buf
[
UNIV_PAGE_SIZE
*
8
];
/* Write buffer full of zeros */
for
(
i
=
0
;
i
<
UNIV_PAGE_SIZE
*
8
;
i
++
)
{
buf
[
i
]
=
'\0'
;
}
try_again:
low
=
size
;
#if (UNIV_WORD_SIZE == 8)
low
=
low
+
(
size_high
<<
32
);
#endif
while
(
offset
<
low
)
{
if
(
low
-
offset
<
UNIV_PAGE_SIZE
*
8
)
{
n_bytes
=
low
-
offset
;
}
else
{
n_bytes
=
UNIV_PAGE_SIZE
*
8
;
}
ret
=
pwrite
(
file
,
buf
,
n_bytes
,
offset
);
if
(
ret
!=
n_bytes
)
{
goto
error_handling
;
}
offset
+=
n_bytes
;
}
ret
=
os_file_flush
(
file
);
if
(
ret
)
{
return
(
TRUE
);
}
#endif
error_handling:
retry
=
os_file_handle_error
(
file
,
name
);
if
(
retry
)
{
goto
try_again
;
}
ut_error
;
}
/***************************************************************************
Flushes the write buffers of a given file to the disk. */
bool
os_file_flush
(
/*==========*/
/* out: TRUE if success */
os_file_t
file
)
/* in, own: handle to a file */
{
#ifdef __WIN__
BOOL
ret
;
ut_a
(
file
);
ret
=
FlushFileBuffers
(
file
);
if
(
ret
)
{
return
(
TRUE
);
}
return
(
FALSE
);
#else
int
ret
;
ret
=
fsync
(
file
);
if
(
ret
==
0
)
{
return
(
TRUE
);
}
return
(
FALSE
);
#endif
}
#ifndef __WIN__
/***********************************************************************
Does a synchronous read operation in Posix. */
static
ssize_t
os_file_pread
(
/*==========*/
/* out: number of bytes read, -1 if error */
os_file_t
file
,
/* in: handle to a file */
void
*
buf
,
/* in: buffer where to read */
ulint
n
,
/* in: number of bytes to read */
ulint
offset
)
/* in: offset from where to read */
{
#ifdef HAVE_PREAD
return
(
pread
(
file
,
buf
,
n
,
(
off_t
)
offset
));
#else
ssize_t
ret
;
ulint
i
;
/* Protect the seek / read operation with a mutex */
i
=
((
ulint
)
file
)
%
OS_FILE_N_SEEK_MUTEXES
;
os_mutex_enter
(
os_file_seek_mutexes
[
i
]);
ret
=
lseek
(
file
,
(
off_t
)
offset
,
0
);
if
(
ret
<
0
)
{
os_mutex_exit
(
os_file_seek_mutexes
[
i
]);
return
(
ret
);
}
ret
=
read
(
file
,
buf
,
n
);
os_mutex_exit
(
os_file_seek_mutexes
[
i
]);
return
(
ret
);
#endif
}
/***********************************************************************
Does a synchronous write operation in Posix. */
static
ssize_t
os_file_pwrite
(
/*===========*/
/* out: number of bytes written, -1 if error */
os_file_t
file
,
/* in: handle to a file */
void
*
buf
,
/* in: buffer from where to write */
ulint
n
,
/* in: number of bytes to write */
ulint
offset
)
/* in: offset where to write */
{
#ifdef HAVE_PWRITE
return
(
pwrite
(
file
,
buf
,
n
,
(
off_t
)
offset
));
#else
ssize_t
ret
;
ulint
i
;
/* Protect the seek / write operation with a mutex */
i
=
((
ulint
)
file
)
%
OS_FILE_N_SEEK_MUTEXES
;
os_mutex_enter
(
os_file_seek_mutexes
[
i
]);
ret
=
lseek
(
file
,
(
off_t
)
offset
,
0
);
if
(
ret
<
0
)
{
os_mutex_exit
(
os_file_seek_mutexes
[
i
]);
return
(
ret
);
}
ret
=
write
(
file
,
buf
,
n
);
os_mutex_exit
(
os_file_seek_mutexes
[
i
]);
return
(
ret
);
#endif
}
#endif
/***********************************************************************
Requests a synchronous positioned read operation. */
bool
os_file_read
(
/*=========*/
/* out: TRUE if request was
successful, FALSE if fail */
os_file_t
file
,
/* in: handle to a file */
void
*
buf
,
/* in: buffer where to read */
ulint
offset
,
/* in: least significant 32 bits of file
offset where to read */
ulint
offset_high
,
/* in: most significant 32 bits of
offset */
ulint
n
)
/* in: number of bytes to read */
{
#ifdef __WIN__
BOOL
ret
;
DWORD
len
;
DWORD
ret2
;
DWORD
err
;
DWORD
low
;
DWORD
high
;
bool
retry
;
ulint
i
;
try_again:
ut_ad
(
file
);
ut_ad
(
buf
);
ut_ad
(
n
>
0
);
low
=
offset
;
high
=
offset_high
;
/* Protect the seek / read operation with a mutex */
i
=
((
ulint
)
file
)
%
OS_FILE_N_SEEK_MUTEXES
;
os_mutex_enter
(
os_file_seek_mutexes
[
i
]);
ret2
=
SetFilePointer
(
file
,
low
,
&
high
,
FILE_BEGIN
);
if
(
ret2
==
0xFFFFFFFF
&&
GetLastError
()
!=
NO_ERROR
)
{
err
=
GetLastError
();
os_mutex_exit
(
os_file_seek_mutexes
[
i
]);
goto
error_handling
;
}
ret
=
ReadFile
(
file
,
buf
,
n
,
&
len
,
NULL
);
os_mutex_exit
(
os_file_seek_mutexes
[
i
]);
if
(
ret
&&
len
==
n
)
{
return
(
TRUE
);
}
err
=
GetLastError
();
#else
bool
retry
;
ssize_t
ret
;
ulint
i
;
#if (UNIV_WORD_SIZE == 8)
offset
=
offset
+
(
offset_high
<<
32
);
#endif
try_again:
/* Protect the seek / read operation with a mutex */
i
=
((
ulint
)
file
)
%
OS_FILE_N_SEEK_MUTEXES
;
os_mutex_enter
(
os_file_seek_mutexes
[
i
]);
ret
=
os_file_pread
(
file
,
buf
,
n
,
(
off_t
)
offset
);
if
(
ret
==
n
)
{
os_mutex_exit
(
os_file_seek_mutexes
[
i
]);
return
(
TRUE
);
}
#endif
error_handling:
retry
=
os_file_handle_error
(
file
,
NULL
);
if
(
retry
)
{
goto
try_again
;
}
ut_error
;
return
(
FALSE
);
}
/***********************************************************************
Requests a synchronous write operation. */
bool
os_file_write
(
/*==========*/
/* out: TRUE if request was
successful, FALSE if fail */
char
*
name
,
/* in: name of the file or path as a
null-terminated string */
os_file_t
file
,
/* in: handle to a file */
void
*
buf
,
/* in: buffer from which to write */
ulint
offset
,
/* in: least significant 32 bits of file
offset where to write */
ulint
offset_high
,
/* in: most significant 32 bits of
offset */
ulint
n
)
/* in: number of bytes to write */
{
#ifdef __WIN__
BOOL
ret
;
DWORD
len
;
DWORD
ret2
;
DWORD
err
;
DWORD
low
;
DWORD
high
;
bool
retry
;
ulint
i
;
try_again:
ut_ad
(
file
);
ut_ad
(
buf
);
ut_ad
(
n
>
0
);
low
=
offset
;
high
=
offset_high
;
/* Protect the seek / write operation with a mutex */
i
=
((
ulint
)
file
)
%
OS_FILE_N_SEEK_MUTEXES
;
os_mutex_enter
(
os_file_seek_mutexes
[
i
]);
ret2
=
SetFilePointer
(
file
,
low
,
&
high
,
FILE_BEGIN
);
if
(
ret2
==
0xFFFFFFFF
&&
GetLastError
()
!=
NO_ERROR
)
{
err
=
GetLastError
();
os_mutex_exit
(
os_file_seek_mutexes
[
i
]);
goto
error_handling
;
}
ret
=
WriteFile
(
file
,
buf
,
n
,
&
len
,
NULL
);
os_mutex_exit
(
os_file_seek_mutexes
[
i
]);
if
(
ret
&&
len
==
n
)
{
return
(
TRUE
);
}
#else
bool
retry
;
ssize_t
ret
;
#if (UNIV_WORD_SIZE == 8)
offset
=
offset
+
(
offset_high
<<
32
);
#endif
try_again:
ret
=
pwrite
(
file
,
buf
,
n
,
(
off_t
)
offset
);
if
(
ret
==
n
)
{
return
(
TRUE
);
}
#endif
error_handling:
retry
=
os_file_handle_error
(
file
,
name
);
if
(
retry
)
{
goto
try_again
;
}
ut_error
;
return
(
FALSE
);
}
/********************************************************************
Returns a pointer to the nth slot in the aio array. */
static
os_aio_slot_t
*
os_aio_array_get_nth_slot
(
/*======================*/
/* out: pointer to slot */
os_aio_array_t
*
array
,
/* in: aio array */
ulint
index
)
/* in: index of the slot */
{
ut_a
(
index
<
array
->
n_slots
);
return
((
array
->
slots
)
+
index
);
}
/****************************************************************************
Creates an aio wait array. */
static
os_aio_array_t
*
os_aio_array_create
(
/*================*/
/* out, own: aio array */
ulint
n
,
/* in: maximum number of pending aio operations
allowed; n must be divisible by n_segments */
ulint
n_segments
)
/* in: number of segments in the aio array */
{
os_aio_array_t
*
array
;
ulint
i
;
os_aio_slot_t
*
slot
;
#ifdef WIN_ASYNC_IO
OVERLAPPED
*
over
;
#endif
ut_a
(
n
>
0
);
ut_a
(
n_segments
>
0
);
ut_a
(
n
%
n_segments
==
0
);
array
=
ut_malloc
(
sizeof
(
os_aio_array_t
));
array
->
mutex
=
os_mutex_create
(
NULL
);
array
->
not_full
=
os_event_create
(
NULL
);
array
->
n_slots
=
n
;
array
->
n_segments
=
n_segments
;
array
->
n_reserved
=
0
;
array
->
slots
=
ut_malloc
(
n
*
sizeof
(
os_aio_slot_t
));
array
->
events
=
ut_malloc
(
n
*
sizeof
(
os_event_t
));
for
(
i
=
0
;
i
<
n
;
i
++
)
{
slot
=
os_aio_array_get_nth_slot
(
array
,
i
);
slot
->
pos
=
i
;
slot
->
reserved
=
FALSE
;
#ifdef WIN_ASYNC_IO
over
=
&
(
slot
->
control
);
over
->
hEvent
=
os_event_create
(
NULL
);
*
((
array
->
events
)
+
i
)
=
over
->
hEvent
;
#elif defined(POSIX_ASYNC_IO)
slot
->
ready
=
os_event_create
(
NULL
);
#endif
}
return
(
array
);
}
/****************************************************************************
Initializes the asynchronous io system. Creates separate aio array for
non-ibuf read and write, a third aio array for the ibuf i/o, with just one
segment, two aio arrays for log reads and writes with one segment, and a
synchronous aio array of the specified size. The combined number of segments
in the three first aio arrays is the parameter n_segments given to the
function. The caller must create an i/o handler thread for each segment in
the four first arrays, but not for the sync aio array. */
void
os_aio_init
(
/*========*/
ulint
n
,
/* in: maximum number of pending aio operations
allowed; n must be divisible by n_segments */
ulint
n_segments
,
/* in: combined number of segments in the four
first aio arrays; must be >= 4 */
ulint
n_slots_sync
)
/* in: number of slots in the sync aio array */
{
ulint
n_read_segs
;
ulint
n_write_segs
;
ulint
n_per_seg
;
ulint
i
;
ut_ad
(
n
%
n_segments
==
0
);
ut_ad
(
n_segments
>=
4
);
n_per_seg
=
n
/
n_segments
;
n_write_segs
=
(
n_segments
-
2
)
/
2
;
n_read_segs
=
n_segments
-
2
-
n_write_segs
;
os_aio_read_array
=
os_aio_array_create
(
n_read_segs
*
n_per_seg
,
n_read_segs
);
os_aio_write_array
=
os_aio_array_create
(
n_write_segs
*
n_per_seg
,
n_write_segs
);
os_aio_ibuf_array
=
os_aio_array_create
(
n_per_seg
,
1
);
os_aio_log_array
=
os_aio_array_create
(
n_per_seg
,
1
);
os_aio_sync_array
=
os_aio_array_create
(
n_slots_sync
,
1
);
os_aio_n_segments
=
n_segments
;
#if !(defined(WIN_ASYNC_IO) || defined(POSIX_ASYNC_IO))
os_aio_use_native_aio
=
FALSE
;
#endif
os_aio_validate
();
for
(
i
=
0
;
i
<
OS_FILE_N_SEEK_MUTEXES
;
i
++
)
{
os_file_seek_mutexes
[
i
]
=
os_mutex_create
(
NULL
);
}
os_aio_segment_wait_events
=
ut_malloc
(
n_segments
*
sizeof
(
void
*
));
for
(
i
=
0
;
i
<
n_segments
;
i
++
)
{
os_aio_segment_wait_events
[
i
]
=
os_event_create
(
NULL
);
}
}
/**************************************************************************
Calculates segment number for a slot. */
static
ulint
os_aio_get_segment_no_from_slot
(
/*============================*/
/* out: segment number (which is the number
used by, for example, i/o-handler threads) */
os_aio_array_t
*
array
,
/* in: aio wait array */
os_aio_slot_t
*
slot
)
/* in: slot in this array */
{
ulint
segment
;
ulint
seg_len
;
if
(
array
==
os_aio_ibuf_array
)
{
segment
=
0
;
}
else
if
(
array
==
os_aio_log_array
)
{
segment
=
1
;
}
else
if
(
array
==
os_aio_read_array
)
{
seg_len
=
os_aio_read_array
->
n_slots
/
os_aio_read_array
->
n_segments
;
segment
=
2
+
slot
->
pos
/
seg_len
;
}
else
{
ut_a
(
array
==
os_aio_write_array
);
seg_len
=
os_aio_write_array
->
n_slots
/
os_aio_write_array
->
n_segments
;
segment
=
os_aio_read_array
->
n_segments
+
2
+
slot
->
pos
/
seg_len
;
}
return
(
segment
);
}
/**************************************************************************
Calculates local segment number and aio array from global segment number. */
static
ulint
os_aio_get_array_and_local_segment
(
/*===============================*/
/* out: local segment number within
the aio array */
os_aio_array_t
**
array
,
/* out: aio wait array */
ulint
global_segment
)
/* in: global segment number */
{
ulint
segment
;
ut_a
(
global_segment
<
os_aio_n_segments
);
if
(
global_segment
==
0
)
{
*
array
=
os_aio_ibuf_array
;
segment
=
0
;
}
else
if
(
global_segment
==
1
)
{
*
array
=
os_aio_log_array
;
segment
=
0
;
}
else
if
(
global_segment
<
os_aio_read_array
->
n_segments
+
2
)
{
*
array
=
os_aio_read_array
;
segment
=
global_segment
-
2
;
}
else
{
*
array
=
os_aio_write_array
;
segment
=
global_segment
-
(
os_aio_read_array
->
n_segments
+
2
);
}
return
(
segment
);
}
/***********************************************************************
Gets an integer value designating a specified aio array. This is used
to give numbers to signals in Posix aio. */
static
ulint
os_aio_get_array_no
(
/*================*/
os_aio_array_t
*
array
)
/* in: aio array */
{
if
(
array
==
os_aio_ibuf_array
)
{
return
(
0
);
}
else
if
(
array
==
os_aio_log_array
)
{
return
(
1
);
}
else
if
(
array
==
os_aio_read_array
)
{
return
(
2
);
}
else
if
(
array
==
os_aio_write_array
)
{
return
(
3
);
}
else
{
ut_a
(
0
);
return
(
0
);
}
}
/***********************************************************************
Gets the aio array for its number. */
static
os_aio_array_t
*
os_aio_get_array_from_no
(
/*=====================*/
/* out: aio array */
ulint
n
)
/* in: array number */
{
if
(
n
==
0
)
{
return
(
os_aio_ibuf_array
);
}
else
if
(
n
==
1
)
{
return
(
os_aio_log_array
);
}
else
if
(
n
==
2
)
{
return
(
os_aio_read_array
);
}
else
if
(
n
==
3
)
{
return
(
os_aio_write_array
);
}
else
{
ut_a
(
0
);
return
(
NULL
);
}
}
/***********************************************************************
Requests for a slot in the aio array. If no slot is available, waits until
not_full-event becomes signaled. */
static
os_aio_slot_t
*
os_aio_array_reserve_slot
(
/*======================*/
/* out: pointer to slot */
ulint
type
,
/* in: OS_FILE_READ or OS_FILE_WRITE */
os_aio_array_t
*
array
,
/* in: aio array */
void
*
message1
,
/* in: message to be passed along with
the aio operation */
void
*
message2
,
/* in: message to be passed along with
the aio operation */
os_file_t
file
,
/* in: file handle */
char
*
name
,
/* in: name of the file or path as a
null-terminated string */
void
*
buf
,
/* in: buffer where to read or from which
to write */
ulint
offset
,
/* in: least significant 32 bits of file
offset */
ulint
offset_high
,
/* in: most significant 32 bits of
offset */
ulint
len
)
/* in: length of the block to read or write */
{
os_aio_slot_t
*
slot
;
#ifdef WIN_ASYNC_IO
OVERLAPPED
*
control
;
#elif POSIX_ASYNC_IO
struct
aiocb
*
control
;
ulint
type
;
#endif
ulint
i
;
loop:
os_mutex_enter
(
array
->
mutex
);
if
(
array
->
n_reserved
==
array
->
n_slots
)
{
os_mutex_exit
(
array
->
mutex
);
os_event_wait
(
array
->
not_full
);
goto
loop
;
}
for
(
i
=
0
;;
i
++
)
{
slot
=
os_aio_array_get_nth_slot
(
array
,
i
);
if
(
slot
->
reserved
==
FALSE
)
{
break
;
}
}
array
->
n_reserved
++
;
if
(
array
->
n_reserved
==
array
->
n_slots
)
{
os_event_reset
(
array
->
not_full
);
}
slot
->
reserved
=
TRUE
;
slot
->
message1
=
message1
;
slot
->
message2
=
message2
;
slot
->
file
=
file
;
slot
->
name
=
name
;
slot
->
len
=
len
;
slot
->
type
=
type
;
slot
->
buf
=
buf
;
slot
->
offset
=
offset
;
slot
->
offset_high
=
offset_high
;
slot
->
io_already_done
=
FALSE
;
#ifdef WIN_ASYNC_IO
control
=
&
(
slot
->
control
);
control
->
Offset
=
(
DWORD
)
offset
;
control
->
OffsetHigh
=
(
DWORD
)
offset_high
;
os_event_reset
(
control
->
hEvent
);
#elif POSIX_ASYNC_IO
#if (UNIV_WORD_SIZE == 8)
offset
=
offset
+
(
offset_high
<<
32
);
#else
ut_a
(
offset_high
==
0
);
#endif
control
=
&
(
slot
->
control
);
control
->
aio_fildes
=
file
;
control
->
aio_buf
=
buf
;
control
->
aio_nbytes
=
len
;
control
->
aio_offset
=
offset
;
control
->
aio_reqprio
=
0
;
control
->
aio_sigevent
.
sigev_notify
=
SIGEV_SIGNAL
;
control
->
aio_sigevent
.
signo
=
SIGRTMAX
+
1
+
os_aio_get_array_no
(
array
);
/* TODO: How to choose the signal numbers? */
control
->
aio_sigevent
.
sigev_value
.
sival_ptr
=
slot
;
#endif
os_mutex_exit
(
array
->
mutex
);
return
(
slot
);
}
/***********************************************************************
Frees a slot in the aio array. */
static
void
os_aio_array_free_slot
(
/*===================*/
os_aio_array_t
*
array
,
/* in: aio array */
os_aio_slot_t
*
slot
)
/* in: pointer to slot */
{
ut_ad
(
array
);
ut_ad
(
slot
);
os_mutex_enter
(
array
->
mutex
);
ut_ad
(
slot
->
reserved
);
slot
->
reserved
=
FALSE
;
array
->
n_reserved
--
;
if
(
array
->
n_reserved
==
array
->
n_slots
-
1
)
{
os_event_set
(
array
->
not_full
);
}
#ifdef WIN_ASYNC_IO
os_event_reset
(
slot
->
control
.
hEvent
);
#endif
os_mutex_exit
(
array
->
mutex
);
}
/**************************************************************************
Wakes up a simulated aio i/o-handler thread if it has something to do. */
static
void
os_aio_simulated_wake_handler_thread
(
/*=================================*/
ulint
global_segment
)
/* in: the number of the segment in the aio
arrays */
{
os_aio_array_t
*
array
;
ulint
segment
;
os_aio_slot_t
*
slot
;
ulint
n
;
ulint
i
;
ut_ad
(
!
os_aio_use_native_aio
);
segment
=
os_aio_get_array_and_local_segment
(
&
array
,
global_segment
);
n
=
array
->
n_slots
/
array
->
n_segments
;
/* Look through n slots after the segment * n'th slot */
os_mutex_enter
(
array
->
mutex
);
for
(
i
=
0
;
i
<
n
;
i
++
)
{
slot
=
os_aio_array_get_nth_slot
(
array
,
i
+
segment
*
n
);
if
(
slot
->
reserved
)
{
/* Found an i/o request */
break
;
}
}
os_mutex_exit
(
array
->
mutex
);
if
(
i
<
n
)
{
os_event_set
(
os_aio_segment_wait_events
[
global_segment
]);
}
}
/**************************************************************************
Wakes up simulated aio i/o-handler threads if they have something to do. */
void
os_aio_simulated_wake_handler_threads
(
void
)
/*=======================================*/
{
ulint
i
;
if
(
os_aio_use_native_aio
)
{
/* We do not use simulated aio: do nothing */
return
;
}
for
(
i
=
0
;
i
<
os_aio_n_segments
;
i
++
)
{
os_aio_simulated_wake_handler_thread
(
i
);
}
}
/***********************************************************************
Requests an asynchronous i/o operation. */
bool
os_aio
(
/*===*/
/* out: TRUE if request was queued
successfully, FALSE if fail */
ulint
type
,
/* in: OS_FILE_READ or OS_FILE_WRITE */
ulint
mode
,
/* in: OS_AIO_NORMAL, ..., possibly ORed
to OS_AIO_SIMULATED_WAKE_LATER: the
last flag advises this function not to wake
i/o-handler threads, but the caller will
do the waking explicitly later, in this
way the caller can post several requests in
a batch; NOTE that the batch must not be
so big that it exhausts the slots in aio
arrays! NOTE that a simulated batch
may introduce hidden chances of deadlocks,
because i/os are not actually handled until
all have been posted: use with great
caution! */
char
*
name
,
/* in: name of the file or path as a
null-terminated string */
os_file_t
file
,
/* in: handle to a file */
void
*
buf
,
/* in: buffer where to read or from which
to write */
ulint
offset
,
/* in: least significant 32 bits of file
offset where to read or write */
ulint
offset_high
,
/* in: most significant 32 bits of
offset */
ulint
n
,
/* in: number of bytes to read or write */
void
*
message1
,
/* in: messages for the aio handler (these
can be used to identify a completed aio
operation); if mode is OS_AIO_SYNC, these
are ignored */
void
*
message2
)
{
os_aio_array_t
*
array
;
os_aio_slot_t
*
slot
;
#ifdef WIN_ASYNC_IO
BOOL
ret
=
TRUE
;
DWORD
len
=
n
;
void
*
dummy_mess1
;
void
*
dummy_mess2
;
#endif
ulint
err
=
0
;
bool
retry
;
ulint
wake_later
;
ut_ad
(
file
);
ut_ad
(
buf
);
ut_ad
(
n
>
0
);
ut_ad
(
n
%
OS_FILE_LOG_BLOCK_SIZE
==
0
);
ut_ad
((
ulint
)
buf
%
OS_FILE_LOG_BLOCK_SIZE
==
0
)
ut_ad
(
offset
%
OS_FILE_LOG_BLOCK_SIZE
==
0
);
ut_ad
(
os_aio_validate
());
wake_later
=
mode
&
OS_AIO_SIMULATED_WAKE_LATER
;
mode
=
mode
&
(
~
OS_AIO_SIMULATED_WAKE_LATER
);
if
(
mode
==
OS_AIO_SYNC
#ifdef WIN_ASYNC_IO
&&
!
os_aio_use_native_aio
#endif
)
{
/* This is actually an ordinary synchronous read or write:
no need to use an i/o-handler thread. NOTE that if we use
Windows async i/o, Windows does not allow us to use
ordinary synchronous os_file_read etc. on the same file,
therefore we have built a special mechanism for synchronous
wait in the Windows case. */
if
(
type
==
OS_FILE_READ
)
{
return
(
os_file_read
(
file
,
buf
,
offset
,
offset_high
,
n
));
}
ut_a
(
type
==
OS_FILE_WRITE
);
return
(
os_file_write
(
name
,
file
,
buf
,
offset
,
offset_high
,
n
));
}
try_again:
if
(
mode
==
OS_AIO_NORMAL
)
{
if
(
type
==
OS_FILE_READ
)
{
array
=
os_aio_read_array
;
}
else
{
array
=
os_aio_write_array
;
}
}
else
if
(
mode
==
OS_AIO_IBUF
)
{
ut_ad
(
type
==
OS_FILE_READ
);
array
=
os_aio_ibuf_array
;
}
else
if
(
mode
==
OS_AIO_LOG
)
{
array
=
os_aio_log_array
;
}
else
if
(
mode
==
OS_AIO_SYNC
)
{
array
=
os_aio_sync_array
;
}
else
{
ut_error
;
}
slot
=
os_aio_array_reserve_slot
(
type
,
array
,
message1
,
message2
,
file
,
name
,
buf
,
offset
,
offset_high
,
n
);
if
(
type
==
OS_FILE_READ
)
{
if
(
os_aio_use_native_aio
)
{
#ifdef WIN_ASYNC_IO
ret
=
ReadFile
(
file
,
buf
,
(
DWORD
)
n
,
&
len
,
&
(
slot
->
control
));
#elif defined(POSIX_ASYNC_IO)
err
=
(
ulint
)
aio_read
(
&
(
slot
->
control
));
#endif
}
else
{
if
(
!
wake_later
)
{
os_aio_simulated_wake_handler_thread
(
os_aio_get_segment_no_from_slot
(
array
,
slot
));
}
}
}
else
if
(
type
==
OS_FILE_WRITE
)
{
if
(
os_aio_use_native_aio
)
{
#ifdef WIN_ASYNC_IO
ret
=
WriteFile
(
file
,
buf
,
(
DWORD
)
n
,
&
len
,
&
(
slot
->
control
));
#elif defined(POSIX_ASYNC_IO)
err
=
(
ulint
)
aio_write
(
&
(
slot
->
control
));
#endif
}
else
{
if
(
!
wake_later
)
{
os_aio_simulated_wake_handler_thread
(
os_aio_get_segment_no_from_slot
(
array
,
slot
));
}
}
}
else
{
ut_error
;
}
#ifdef WIN_ASYNC_IO
if
((
ret
&&
len
==
n
)
||
(
!
ret
&&
GetLastError
()
==
ERROR_IO_PENDING
))
{
/* aio was queued successfully! */
if
(
mode
==
OS_AIO_SYNC
)
{
/* We want a synchronous i/o operation on a file
where we also use async i/o: in Windows we must
use the same wait mechanism as for async i/o */
return
(
os_aio_windows_handle
(
ULINT_UNDEFINED
,
slot
->
pos
,
&
dummy_mess1
,
&
dummy_mess2
));
}
return
(
TRUE
);
}
#else
if
(
err
==
0
)
{
/* aio was queued successfully! */
return
(
TRUE
);
}
#endif
os_aio_array_free_slot
(
array
,
slot
);
retry
=
os_file_handle_error
(
file
,
name
);
if
(
retry
)
{
goto
try_again
;
}
ut_error
;
return
(
FALSE
);
}
#ifdef WIN_ASYNC_IO
/**************************************************************************
This function is only used in Windows asynchronous i/o.
Waits for an aio operation to complete. This function is used to wait the
for completed requests. The aio array of pending requests is divided
into segments. The thread specifies which segment or slot it wants to wait
for. NOTE: this function will also take care of freeing the aio slot,
therefore no other thread is allowed to do the freeing! */
bool
os_aio_windows_handle
(
/*==================*/
/* out: TRUE if the aio operation succeeded */
ulint
segment
,
/* in: the number of the segment in the aio
arrays to wait for; segment 0 is the ibuf
i/o thread, segment 1 the log i/o thread,
then follow the non-ibuf read threads, and as
the last are the non-ibuf write threads; if
this is ULINT_UNDEFINED, then it means that
sync aio is used, and this parameter is
ignored */
ulint
pos
,
/* this parameter is used only in sync aio:
wait for the aio slot at this position */
void
**
message1
,
/* out: the messages passed with the aio
request; note that also in the case where
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
void
**
message2
)
{
os_aio_array_t
*
array
;
os_aio_slot_t
*
slot
;
ulint
n
;
ulint
i
;
bool
ret_val
;
ulint
err
;
BOOL
ret
;
DWORD
len
;
if
(
segment
==
ULINT_UNDEFINED
)
{
array
=
os_aio_sync_array
;
segment
=
0
;
}
else
{
segment
=
os_aio_get_array_and_local_segment
(
&
array
,
segment
);
}
/* NOTE! We only access constant fields in os_aio_array. Therefore
we do not have to acquire the protecting mutex yet */
ut_ad
(
os_aio_validate
());
ut_ad
(
segment
<
array
->
n_segments
);
n
=
array
->
n_slots
/
array
->
n_segments
;
if
(
array
==
os_aio_sync_array
)
{
ut_ad
(
pos
<
array
->
n_slots
);
os_event_wait
(
array
->
events
[
pos
]);
i
=
pos
;
}
else
{
i
=
os_event_wait_multiple
(
n
,
(
array
->
events
)
+
segment
*
n
);
}
os_mutex_enter
(
array
->
mutex
);
slot
=
os_aio_array_get_nth_slot
(
array
,
i
+
segment
*
n
);
ut_a
(
slot
->
reserved
);
ret
=
GetOverlappedResult
(
slot
->
file
,
&
(
slot
->
control
),
&
len
,
TRUE
);
*
message1
=
slot
->
message1
;
*
message2
=
slot
->
message2
;
if
(
ret
&&
len
==
slot
->
len
)
{
ret_val
=
TRUE
;
}
else
{
err
=
GetLastError
();
ut_error
;
ret_val
=
FALSE
;
}
os_mutex_exit
(
array
->
mutex
);
os_aio_array_free_slot
(
array
,
slot
);
return
(
ret_val
);
}
#endif
#ifdef POSIX_ASYNC_IO
/**************************************************************************
This function is only used in Posix asynchronous i/o. Waits for an aio
operation to complete. */
bool
os_aio_posix_handle
(
/*================*/
/* out: TRUE if the aio operation succeeded */
ulint
array_no
,
/* in: array number 0 - 3 */
void
**
message1
,
/* out: the messages passed with the aio
request; note that also in the case where
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
void
**
message2
)
{
os_aio_array_t
*
array
;
os_aio_slot_t
*
slot
;
siginfo_t
info
;
sigset_t
sigset
;
int
ret
;
sigemptyset
(
&
sigset
);
sigaddset
(
&
sigset
,
SIGRTMAX
+
1
+
array_no
);
ret
=
sigwaitinfo
(
&
sigset
,
&
info
);
if
(
ret
!=
SIGRTMAX
+
1
+
array_no
)
{
ut_a
(
0
);
return
(
FALSE
);
}
array
=
os_aio_get_array_from_no
(
array_no
);
os_mutex_enter
(
array
->
mutex
);
slot
=
siginfo
.
si_value
.
sival_ptr
;
ut_a
(
slot
->
reserved
);
*
message1
=
slot
->
message1
;
*
message2
=
slot
->
message2
;
os_mutex_exit
(
array
->
mutex
);
os_aio_array_free_slot
(
array
,
slot
);
return
(
TRUE
);
}
#endif
/**************************************************************************
Does simulated aio. This function should be called by an i/o-handler
thread. */
bool
os_aio_simulated_handle
(
/*====================*/
/* out: TRUE if the aio operation succeeded */
ulint
global_segment
,
/* in: the number of the segment in the aio
arrays to wait for; segment 0 is the ibuf
i/o thread, segment 1 the log i/o thread,
then follow the non-ibuf read threads, and as
the last are the non-ibuf write threads */
void
**
message1
,
/* out: the messages passed with the aio
request; note that also in the case where
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
void
**
message2
)
{
os_aio_array_t
*
array
;
ulint
segment
;
os_aio_slot_t
*
slot
;
os_aio_slot_t
*
slot2
;
os_aio_slot_t
*
consecutive_ios
[
OS_AIO_MERGE_N_CONSECUTIVE
];
ulint
n_consecutive
;
ulint
total_len
;
ulint
offs
;
byte
*
combined_buf
;
bool
ret
;
ulint
n
;
ulint
i
;
segment
=
os_aio_get_array_and_local_segment
(
&
array
,
global_segment
);
restart:
/* NOTE! We only access constant fields in os_aio_array. Therefore
we do not have to acquire the protecting mutex yet */
ut_ad
(
os_aio_validate
());
ut_ad
(
segment
<
array
->
n_segments
);
n
=
array
->
n_slots
/
array
->
n_segments
;
/* Look through n slots after the segment * n'th slot */
os_mutex_enter
(
array
->
mutex
);
/* Check if there is a slot for which the i/o has already been
done */
for
(
i
=
0
;
i
<
n
;
i
++
)
{
slot
=
os_aio_array_get_nth_slot
(
array
,
i
+
segment
*
n
);
if
(
slot
->
reserved
&&
slot
->
io_already_done
)
{
goto
slot_io_done
;
}
}
n_consecutive
=
0
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
slot
=
os_aio_array_get_nth_slot
(
array
,
i
+
segment
*
n
);
if
(
slot
->
reserved
)
{
/* Found an i/o request */
consecutive_ios
[
n_consecutive
]
=
slot
;
n_consecutive
++
;
break
;
}
}
/* Check if there are several consecutive blocks to read or write */
consecutive_loop:
for
(
i
=
0
;
i
<
n
;
i
++
)
{
slot2
=
os_aio_array_get_nth_slot
(
array
,
i
+
segment
*
n
);
if
(
slot2
->
reserved
&&
slot2
!=
slot
&&
slot2
->
offset
==
slot
->
offset
+
slot
->
len
&&
slot
->
offset
+
slot
->
len
>
slot
->
offset
/* check that
sum does not wrap over */
&&
slot2
->
offset_high
==
slot
->
offset_high
&&
slot2
->
type
==
slot
->
type
&&
slot2
->
file
==
slot
->
file
)
{
/* Found a consecutive i/o request */
consecutive_ios
[
n_consecutive
]
=
slot2
;
n_consecutive
++
;
slot
=
slot2
;
if
(
n_consecutive
<
OS_AIO_MERGE_N_CONSECUTIVE
)
{
goto
consecutive_loop
;
}
else
{
break
;
}
}
}
if
(
n_consecutive
==
0
)
{
/* No i/o requested at the moment */
goto
wait_for_io
;
}
/* We have now collected n_consecutive i/o requests in the array;
allocate a single buffer which can hold all data, and perform the
i/o */
total_len
=
0
;
slot
=
consecutive_ios
[
0
];
for
(
i
=
0
;
i
<
n_consecutive
;
i
++
)
{
total_len
+=
consecutive_ios
[
i
]
->
len
;
}
if
(
n_consecutive
==
1
)
{
/* We can use the buffer of the i/o request */
combined_buf
=
slot
->
buf
;
}
else
{
combined_buf
=
ut_malloc
(
total_len
);
ut_a
(
combined_buf
);
}
/* We release the array mutex for the time of the i/o: NOTE that
this assumes that there is just one i/o-handler thread serving
a single segment of slots! */
os_mutex_exit
(
array
->
mutex
);
if
(
slot
->
type
==
OS_FILE_WRITE
&&
n_consecutive
>
1
)
{
/* Copy the buffers to the combined buffer */
offs
=
0
;
for
(
i
=
0
;
i
<
n_consecutive
;
i
++
)
{
ut_memcpy
(
combined_buf
+
offs
,
consecutive_ios
[
i
]
->
buf
,
consecutive_ios
[
i
]
->
len
);
offs
+=
consecutive_ios
[
i
]
->
len
;
}
}
/* Do the i/o with ordinary, synchronous i/o functions: */
if
(
slot
->
type
==
OS_FILE_WRITE
)
{
ret
=
os_file_write
(
slot
->
name
,
slot
->
file
,
combined_buf
,
slot
->
offset
,
slot
->
offset_high
,
total_len
);
}
else
{
ret
=
os_file_read
(
slot
->
file
,
combined_buf
,
slot
->
offset
,
slot
->
offset_high
,
total_len
);
}
ut_a
(
ret
);
if
(
slot
->
type
==
OS_FILE_READ
&&
n_consecutive
>
1
)
{
/* Copy the combined buffer to individual buffers */
offs
=
0
;
for
(
i
=
0
;
i
<
n_consecutive
;
i
++
)
{
ut_memcpy
(
consecutive_ios
[
i
]
->
buf
,
combined_buf
+
offs
,
consecutive_ios
[
i
]
->
len
);
offs
+=
consecutive_ios
[
i
]
->
len
;
}
}
if
(
n_consecutive
>
1
)
{
ut_free
(
combined_buf
);
}
os_mutex_enter
(
array
->
mutex
);
/* Mark the i/os done in slots */
for
(
i
=
0
;
i
<
n_consecutive
;
i
++
)
{
consecutive_ios
[
i
]
->
io_already_done
=
TRUE
;
}
/* We return the messages for the first slot now, and if there were
several slots, the messages will be returned with subsequent calls
of this function */
slot_io_done:
ut_a
(
slot
->
reserved
);
*
message1
=
slot
->
message1
;
*
message2
=
slot
->
message2
;
os_mutex_exit
(
array
->
mutex
);
os_aio_array_free_slot
(
array
,
slot
);
return
(
ret
);
wait_for_io:
/* We wait here until there again can be i/os in the segment
of this thread */
os_event_reset
(
os_aio_segment_wait_events
[
global_segment
]);
os_mutex_exit
(
array
->
mutex
);
os_event_wait
(
os_aio_segment_wait_events
[
global_segment
]);
goto
restart
;
}
/**************************************************************************
Validates the consistency of an aio array. */
static
bool
os_aio_array_validate
(
/*==================*/
/* out: TRUE if ok */
os_aio_array_t
*
array
)
/* in: aio wait array */
{
os_aio_slot_t
*
slot
;
ulint
n_reserved
=
0
;
ulint
i
;
ut_a
(
array
);
os_mutex_enter
(
array
->
mutex
);
ut_a
(
array
->
n_slots
>
0
);
ut_a
(
array
->
n_segments
>
0
);
for
(
i
=
0
;
i
<
array
->
n_slots
;
i
++
)
{
slot
=
os_aio_array_get_nth_slot
(
array
,
i
);
if
(
slot
->
reserved
)
{
n_reserved
++
;
ut_a
(
slot
->
len
>
0
);
}
}
ut_a
(
array
->
n_reserved
==
n_reserved
);
os_mutex_exit
(
array
->
mutex
);
return
(
TRUE
);
}
/**************************************************************************
Validates the consistency the aio system. */
bool
os_aio_validate
(
void
)
/*=================*/
/* out: TRUE if ok */
{
os_aio_array_validate
(
os_aio_read_array
);
os_aio_array_validate
(
os_aio_write_array
);
os_aio_array_validate
(
os_aio_ibuf_array
);
os_aio_array_validate
(
os_aio_log_array
);
os_aio_array_validate
(
os_aio_sync_array
);
return
(
TRUE
);
}
/**************************************************************************
Prints info of the aio arrays. */
void
os_aio_print
(
void
)
/*==============*/
{
os_aio_array_t
*
array
;
os_aio_slot_t
*
slot
;
ulint
n_reserved
;
ulint
i
;
array
=
os_aio_read_array
;
loop:
ut_a
(
array
);
printf
(
"INFO OF AN AIO ARRAY
\n
"
);
os_mutex_enter
(
array
->
mutex
);
ut_a
(
array
->
n_slots
>
0
);
ut_a
(
array
->
n_segments
>
0
);
n_reserved
=
0
;
for
(
i
=
0
;
i
<
array
->
n_slots
;
i
++
)
{
slot
=
os_aio_array_get_nth_slot
(
array
,
i
);
if
(
slot
->
reserved
)
{
n_reserved
++
;
printf
(
"Reserved slot, messages %lx %lx
\n
"
,
slot
->
message1
,
slot
->
message2
);
ut_a
(
slot
->
len
>
0
);
}
}
ut_a
(
array
->
n_reserved
==
n_reserved
);
printf
(
"Total of %lu reserved aio slots
\n
"
,
n_reserved
);
os_mutex_exit
(
array
->
mutex
);
if
(
array
==
os_aio_read_array
)
{
array
=
os_aio_write_array
;
goto
loop
;
}
if
(
array
==
os_aio_write_array
)
{
array
=
os_aio_ibuf_array
;
goto
loop
;
}
if
(
array
==
os_aio_ibuf_array
)
{
array
=
os_aio_log_array
;
goto
loop
;
}
if
(
array
==
os_aio_log_array
)
{
array
=
os_aio_sync_array
;
goto
loop
;
}
}
/**************************************************************************
Checks that all slots in the system have been freed, that is, there are
no pending io operations. */
bool
os_aio_all_slots_free
(
void
)
/*=======================*/
/* out: TRUE if all free */
{
os_aio_array_t
*
array
;
ulint
n_res
=
0
;
array
=
os_aio_read_array
;
os_mutex_enter
(
array
->
mutex
);
n_res
+=
array
->
n_reserved
;
os_mutex_exit
(
array
->
mutex
);
array
=
os_aio_write_array
;
os_mutex_enter
(
array
->
mutex
);
n_res
+=
array
->
n_reserved
;
os_mutex_exit
(
array
->
mutex
);
array
=
os_aio_ibuf_array
;
os_mutex_enter
(
array
->
mutex
);
n_res
+=
array
->
n_reserved
;
os_mutex_exit
(
array
->
mutex
);
array
=
os_aio_log_array
;
os_mutex_enter
(
array
->
mutex
);
n_res
+=
array
->
n_reserved
;
os_mutex_exit
(
array
->
mutex
);
array
=
os_aio_sync_array
;
os_mutex_enter
(
array
->
mutex
);
n_res
+=
array
->
n_reserved
;
os_mutex_exit
(
array
->
mutex
);
if
(
n_res
==
0
)
{
return
(
TRUE
);
}
return
(
FALSE
);
}
innobase/srv/srv0srv.c
View file @
ec06c782
...
@@ -863,123 +863,6 @@ srv_release_max_if_no_queries(void)
...
@@ -863,123 +863,6 @@ srv_release_max_if_no_queries(void)
mutex_exit
(
&
kernel_mutex
);
mutex_exit
(
&
kernel_mutex
);
}
}
#ifdef notdefined
/***********************************************************************
Releases one utility thread if no queries are active and
the high-water mark 2 for the utility is exceeded. */
static
void
srv_release_one_if_no_queries
(
void
)
/*===============================*/
{
ulint
m
;
ulint
type
;
mutex_enter
(
&
kernel_mutex
);
if
(
srv_n_threads_active
[
SRV_COM
]
>
0
)
{
mutex_exit
(
&
kernel_mutex
);
return
;
}
type
=
SRV_RECOVERY
;
m
=
1
;
if
((
srv_meter
[
type
]
>
srv_meter_high_water2
[
type
])
&&
(
srv_n_threads_active
[
type
]
<
m
))
{
srv_release_threads
(
type
,
m
-
srv_n_threads_active
[
type
]);
printf
(
"Releasing one background
\n
"
);
}
mutex_exit
(
&
kernel_mutex
);
}
/***********************************************************************
Decrements the utility meter by the value given and suspends the calling
thread, which must be an utility thread of the type given, if necessary. */
static
void
srv_decrement_meter
(
/*================*/
ulint
type
,
/* in: utility type */
ulint
n
)
/* in: value to subtract from meter */
{
ulint
opt
;
os_event_t
event
;
mutex_enter
(
&
kernel_mutex
);
if
(
srv_meter
[
type
]
<
n
)
{
srv_meter
[
type
]
=
0
;
}
else
{
srv_meter
[
type
]
-=
n
;
}
opt
=
srv_max_n_utilities
(
type
);
if
(
opt
<
srv_n_threads_active
[
type
])
{
event
=
srv_suspend_thread
();
mutex_exit
(
&
kernel_mutex
);
os_event_wait
(
event
);
}
else
{
mutex_exit
(
&
kernel_mutex
);
}
}
#endif
/*************************************************************************
Implements the server console. */
ulint
srv_console
(
/*========*/
/* out: return code, not used */
void
*
arg
)
/* in: argument, not used */
{
char
command
[
256
];
UT_NOT_USED
(
arg
);
mutex_enter
(
&
kernel_mutex
);
srv_table_reserve_slot
(
SRV_CONSOLE
);
mutex_exit
(
&
kernel_mutex
);
os_event_wait
(
srv_sys
->
operational
);
for
(;;)
{
scanf
(
"%s"
,
command
);
srv_inc_thread_count
(
SRV_CONSOLE
);
if
(
command
[
0
]
==
'c'
)
{
printf
(
"Making checkpoint
\n
"
);
log_make_checkpoint_at
(
ut_dulint_max
,
TRUE
);
printf
(
"Checkpoint completed
\n
"
);
}
else
if
(
command
[
0
]
==
'd'
)
{
srv_sim_disk_wait_pct
=
atoi
(
command
+
1
);
printf
(
"Starting disk access simulation with pct %lu
\n
"
,
srv_sim_disk_wait_pct
);
}
else
{
printf
(
"
\n
Not supported!
\n
"
);
}
srv_dec_thread_count
(
SRV_CONSOLE
);
}
return
(
0
);
}
/*************************************************************************
/*************************************************************************
Creates the first communication endpoint for the server. This
Creates the first communication endpoint for the server. This
first call also initializes the com0com.* module. */
first call also initializes the com0com.* module. */
...
@@ -1008,69 +891,6 @@ srv_communication_init(
...
@@ -1008,69 +891,6 @@ srv_communication_init(
ut_a
(
ret
==
0
);
ut_a
(
ret
==
0
);
}
}
#ifdef notdefined
/*************************************************************************
Implements the recovery utility. */
static
ulint
srv_recovery_thread
(
/*================*/
/* out: return code, not used */
void
*
arg
)
/* in: not used */
{
ulint
slot_no
;
os_event_t
event
;
UT_NOT_USED
(
arg
);
slot_no
=
srv_table_reserve_slot
(
SRV_RECOVERY
);
os_event_wait
(
srv_sys
->
operational
);
for
(;;)
{
/* Finish a possible recovery */
srv_inc_thread_count
(
SRV_RECOVERY
);
/* recv_recovery_from_checkpoint_finish(); */
srv_dec_thread_count
(
SRV_RECOVERY
);
mutex_enter
(
&
kernel_mutex
);
event
=
srv_suspend_thread
();
mutex_exit
(
&
kernel_mutex
);
/* Wait for somebody to release this thread; (currently, this
should never be released) */
os_event_wait
(
event
);
}
return
(
0
);
}
/*************************************************************************
Implements the purge utility. */
ulint
srv_purge_thread
(
/*=============*/
/* out: return code, not used */
void
*
arg
)
/* in: not used */
{
UT_NOT_USED
(
arg
);
os_event_wait
(
srv_sys
->
operational
);
for
(;;)
{
trx_purge
();
}
return
(
0
);
}
#endif
/* notdefined */
/*************************************************************************
/*************************************************************************
Creates the utility threads. */
Creates the utility threads. */
...
@@ -1100,58 +920,6 @@ srv_create_utility_threads(void)
...
@@ -1100,58 +920,6 @@ srv_create_utility_threads(void)
ut_a(thread); */
ut_a(thread); */
}
}
#ifdef notdefined
/*************************************************************************
Implements the communication threads. */
static
ulint
srv_com_thread
(
/*===========*/
/* out: return code; not used */
void
*
arg
)
/* in: not used */
{
byte
*
msg_buf
;
byte
*
addr_buf
;
ulint
msg_len
;
ulint
addr_len
;
ulint
ret
;
UT_NOT_USED
(
arg
);
srv_table_reserve_slot
(
SRV_COM
);
os_event_wait
(
srv_sys
->
operational
);
msg_buf
=
mem_alloc
(
com_endpoint_get_max_size
(
srv_sys
->
endpoint
));
addr_buf
=
mem_alloc
(
COM_MAX_ADDR_LEN
);
for
(;;)
{
ret
=
com_recvfrom
(
srv_sys
->
endpoint
,
msg_buf
,
com_endpoint_get_max_size
(
srv_sys
->
endpoint
),
&
msg_len
,
(
char
*
)
addr_buf
,
COM_MAX_ADDR_LEN
,
&
addr_len
);
ut_a
(
ret
==
0
);
srv_inc_thread_count
(
SRV_COM
);
sess_process_cli_msg
(
msg_buf
,
msg_len
,
addr_buf
,
addr_len
);
/* srv_increment_meter(SRV_RECOVERY, 1); */
srv_dec_thread_count
(
SRV_COM
);
/* Release one utility thread for each utility if
high water mark 2 is exceeded and there are no
active queries. This is done to utilize possible
quiet time in the server. */
srv_release_one_if_no_queries
();
}
return
(
0
);
}
#endif
/*************************************************************************
/*************************************************************************
Creates the communication threads. */
Creates the communication threads. */
...
@@ -1171,53 +939,6 @@ srv_create_com_threads(void)
...
@@ -1171,53 +939,6 @@ srv_create_com_threads(void)
}
}
}
}
#ifdef notdefined
/*************************************************************************
Implements the worker threads. */
static
ulint
srv_worker_thread
(
/*==============*/
/* out: return code, not used */
void
*
arg
)
/* in: not used */
{
os_event_t
event
;
UT_NOT_USED
(
arg
);
srv_table_reserve_slot
(
SRV_WORKER
);
os_event_wait
(
srv_sys
->
operational
);
for
(;;)
{
mutex_enter
(
&
kernel_mutex
);
event
=
srv_suspend_thread
();
mutex_exit
(
&
kernel_mutex
);
/* Wait for somebody to release this thread */
os_event_wait
(
event
);
srv_inc_thread_count
(
SRV_WORKER
);
/* Check in the server task queue if there is work for this
thread, and do the work */
srv_que_task_queue_check
();
srv_dec_thread_count
(
SRV_WORKER
);
/* Release one utility thread for each utility if
high water mark 2 is exceeded and there are no
active queries. This is done to utilize possible
quiet time in the server. */
srv_release_one_if_no_queries
();
}
return
(
0
);
}
#endif
/*************************************************************************
/*************************************************************************
Creates the worker threads. */
Creates the worker threads. */
...
@@ -1238,404 +959,6 @@ srv_create_worker_threads(void)
...
@@ -1238,404 +959,6 @@ srv_create_worker_threads(void)
}
}
}
}
#ifdef notdefined
/*************************************************************************
Reads a keyword and a value from a file. */
ulint
srv_read_init_val
(
/*==============*/
/* out: DB_SUCCESS or error code */
FILE
*
initfile
,
/* in: file pointer */
char
*
keyword
,
/* in: keyword before value(s), or NULL if
no keyword read */
char
*
str_buf
,
/* in/out: buffer for a string value to read,
buffer size must be 10000 bytes, if NULL
then not read */
ulint
*
num_val
,
/* out: numerical value to read, if NULL
then not read */
ibool
print_not_err
)
/* in: if TRUE, then we will not print
error messages to console */
{
ulint
ret
;
char
scan_buf
[
10000
];
if
(
keyword
==
NULL
)
{
goto
skip_keyword
;
}
ret
=
fscanf
(
initfile
,
"%9999s"
,
scan_buf
);
if
(
ret
==
0
||
ret
==
EOF
||
0
!=
ut_strcmp
(
scan_buf
,
keyword
))
{
if
(
print_not_err
)
{
return
(
DB_ERROR
);
}
printf
(
"Error in InnoDB booting: keyword %s not found
\n
"
,
keyword
);
printf
(
"from the initfile!
\n
"
);
return
(
DB_ERROR
);
}
skip_keyword:
if
(
num_val
==
NULL
&&
str_buf
==
NULL
)
{
return
(
DB_SUCCESS
);
}
ret
=
fscanf
(
initfile
,
"%9999s"
,
scan_buf
);
if
(
ret
==
EOF
||
ret
==
0
)
{
if
(
print_not_err
)
{
return
(
DB_ERROR
);
}
printf
(
"Error in InnoDB booting: could not read first value after %s
\n
"
,
keyword
);
printf
(
"from the initfile!
\n
"
);
return
(
DB_ERROR
);
}
if
(
str_buf
)
{
ut_memcpy
(
str_buf
,
scan_buf
,
10000
);
printf
(
"init keyword %s value %s read
\n
"
,
keyword
,
str_buf
);
if
(
!
num_val
)
{
return
(
DB_SUCCESS
);
}
ret
=
fscanf
(
initfile
,
"%9999s"
,
scan_buf
);
if
(
ret
==
EOF
||
ret
==
0
)
{
if
(
print_not_err
)
{
return
(
DB_ERROR
);
}
printf
(
"Error in InnoDB booting: could not read second value after %s
\n
"
,
keyword
);
printf
(
"from the initfile!
\n
"
);
return
(
DB_ERROR
);
}
}
if
(
ut_strlen
(
scan_buf
)
>
9
)
{
if
(
print_not_err
)
{
return
(
DB_ERROR
);
}
printf
(
"Error in InnoDB booting: numerical value too big after %s
\n
"
,
keyword
);
printf
(
"in the initfile!
\n
"
);
return
(
DB_ERROR
);
}
*
num_val
=
(
ulint
)
atoi
(
scan_buf
);
if
(
*
num_val
>=
1000000000
)
{
if
(
print_not_err
)
{
return
(
DB_ERROR
);
}
printf
(
"Error in InnoDB booting: numerical value too big after %s
\n
"
,
keyword
);
printf
(
"in the initfile!
\n
"
);
return
(
DB_ERROR
);
}
printf
(
"init keyword %s value %lu read
\n
"
,
keyword
,
*
num_val
);
return
(
DB_SUCCESS
);
}
/*************************************************************************
Reads keywords and values from an initfile. */
ulint
srv_read_initfile
(
/*==============*/
/* out: DB_SUCCESS or error code */
FILE
*
initfile
)
/* in: file pointer */
{
char
str_buf
[
10000
];
ulint
n
;
ulint
i
;
ulint
ulint_val
;
ulint
val1
;
ulint
val2
;
ulint
err
;
err
=
srv_read_init_val
(
initfile
,
"INNOBASE_DATA_HOME_DIR"
,
str_buf
,
NULL
,
FALSE
);
if
(
err
!=
DB_SUCCESS
)
return
(
err
);
srv_data_home
=
ut_malloc
(
ut_strlen
(
str_buf
)
+
1
);
ut_memcpy
(
srv_data_home
,
str_buf
,
ut_strlen
(
str_buf
)
+
1
);
err
=
srv_read_init_val
(
initfile
,
"TABLESPACE_NUMBER_OF_DATA_FILES"
,
NULL
,
&
n
,
FALSE
);
if
(
err
!=
DB_SUCCESS
)
return
(
err
);
srv_n_data_files
=
n
;
srv_data_file_names
=
ut_malloc
(
n
*
sizeof
(
char
*
));
srv_data_file_sizes
=
ut_malloc
(
n
*
sizeof
(
ulint
));
for
(
i
=
0
;
i
<
n
;
i
++
)
{
err
=
srv_read_init_val
(
initfile
,
"DATA_FILE_PATH_AND_SIZE_MB"
,
str_buf
,
&
ulint_val
,
FALSE
);
if
(
err
!=
DB_SUCCESS
)
return
(
err
);
srv_data_file_names
[
i
]
=
ut_malloc
(
ut_strlen
(
str_buf
)
+
1
);
ut_memcpy
(
srv_data_file_names
[
i
],
str_buf
,
ut_strlen
(
str_buf
)
+
1
);
srv_data_file_sizes
[
i
]
=
ulint_val
*
((
1024
*
1024
)
/
UNIV_PAGE_SIZE
);
}
err
=
srv_read_init_val
(
initfile
,
"NUMBER_OF_MIRRORED_LOG_GROUPS"
,
NULL
,
&
srv_n_log_groups
,
FALSE
);
if
(
err
!=
DB_SUCCESS
)
return
(
err
);
err
=
srv_read_init_val
(
initfile
,
"NUMBER_OF_LOG_FILES_IN_GROUP"
,
NULL
,
&
srv_n_log_files
,
FALSE
);
if
(
err
!=
DB_SUCCESS
)
return
(
err
);
err
=
srv_read_init_val
(
initfile
,
"LOG_FILE_SIZE_KB"
,
NULL
,
&
srv_log_file_size
,
FALSE
);
if
(
err
!=
DB_SUCCESS
)
return
(
err
);
srv_log_file_size
=
srv_log_file_size
/
(
UNIV_PAGE_SIZE
/
1024
);
srv_log_group_home_dirs
=
ut_malloc
(
srv_n_log_files
*
sizeof
(
char
*
));
for
(
i
=
0
;
i
<
srv_n_log_groups
;
i
++
)
{
err
=
srv_read_init_val
(
initfile
,
"INNOBASE_LOG_GROUP_HOME_DIR"
,
str_buf
,
NULL
,
FALSE
);
if
(
err
!=
DB_SUCCESS
)
return
(
err
);
srv_log_group_home_dirs
[
i
]
=
ut_malloc
(
ut_strlen
(
str_buf
)
+
1
);
ut_memcpy
(
srv_log_group_home_dirs
[
i
],
str_buf
,
ut_strlen
(
str_buf
)
+
1
);
}
err
=
srv_read_init_val
(
initfile
,
"INNOBASE_LOG_ARCH_DIR"
,
str_buf
,
NULL
,
FALSE
);
if
(
err
!=
DB_SUCCESS
)
return
(
err
);
srv_arch_dir
=
ut_malloc
(
ut_strlen
(
str_buf
)
+
1
);
ut_memcpy
(
srv_arch_dir
,
str_buf
,
ut_strlen
(
str_buf
)
+
1
);
err
=
srv_read_init_val
(
initfile
,
"LOG_ARCHIVE_ON(1/0)"
,
NULL
,
&
srv_log_archive_on
,
FALSE
);
if
(
err
!=
DB_SUCCESS
)
return
(
err
);
err
=
srv_read_init_val
(
initfile
,
"LOG_BUFFER_SIZE_KB"
,
NULL
,
&
srv_log_buffer_size
,
FALSE
);
if
(
err
!=
DB_SUCCESS
)
return
(
err
);
srv_log_buffer_size
=
srv_log_buffer_size
/
(
UNIV_PAGE_SIZE
/
1024
);
err
=
srv_read_init_val
(
initfile
,
"FLUSH_LOG_AT_TRX_COMMIT(1/0)"
,
NULL
,
&
srv_flush_log_at_trx_commit
,
FALSE
);
if
(
err
!=
DB_SUCCESS
)
return
(
err
);
err
=
srv_read_init_val
(
initfile
,
"BUFFER_POOL_SIZE_MB"
,
NULL
,
&
srv_pool_size
,
FALSE
);
if
(
err
!=
DB_SUCCESS
)
return
(
err
);
srv_pool_size
=
srv_pool_size
*
((
1024
*
1024
)
/
UNIV_PAGE_SIZE
);
err
=
srv_read_init_val
(
initfile
,
"ADDITIONAL_MEM_POOL_SIZE_MB"
,
NULL
,
&
srv_mem_pool_size
,
FALSE
);
if
(
err
!=
DB_SUCCESS
)
return
(
err
);
srv_mem_pool_size
=
srv_mem_pool_size
*
1024
*
1024
;
srv_lock_table_size
=
20
*
srv_pool_size
;
err
=
srv_read_init_val
(
initfile
,
"NUMBER_OF_FILE_IO_THREADS"
,
NULL
,
&
srv_n_file_io_threads
,
FALSE
);
if
(
err
!=
DB_SUCCESS
)
return
(
err
);
err
=
srv_read_init_val
(
initfile
,
"SRV_RECOVER_FROM_BACKUP"
,
NULL
,
NULL
,
TRUE
);
if
(
err
==
DB_SUCCESS
)
{
srv_archive_recovery
=
TRUE
;
srv_archive_recovery_limit_lsn
=
ut_dulint_max
;
err
=
srv_read_init_val
(
initfile
,
NULL
,
NULL
,
&
val1
,
TRUE
);
err
=
srv_read_init_val
(
initfile
,
NULL
,
NULL
,
&
val2
,
TRUE
);
if
(
err
==
DB_SUCCESS
)
{
srv_archive_recovery_limit_lsn
=
ut_dulint_create
(
val1
,
val2
);
}
}
/* err = srv_read_init_val(initfile,
"SYNC_NUMBER_OF_SPIN_WAIT_ROUNDS", NULL,
&srv_n_spin_wait_rounds);
err = srv_read_init_val(initfile, "SYNC_SPIN_WAIT_DELAY", NULL,
&srv_spin_wait_delay); */
return
(
DB_SUCCESS
);
}
/*************************************************************************
Reads keywords and a values from an initfile. In case of an error, exits
from the process. */
void
srv_read_initfile
(
/*==============*/
FILE
*
initfile
)
/* in: file pointer */
{
char
str_buf
[
10000
];
ulint
ulint_val
;
srv_read_init_val
(
initfile
,
FALSE
,
"SRV_ENDPOINT_NAME"
,
str_buf
,
&
ulint_val
);
ut_a
(
ut_strlen
(
str_buf
)
<
COM_MAX_ADDR_LEN
);
ut_memcpy
(
srv_endpoint_name
,
str_buf
,
COM_MAX_ADDR_LEN
);
srv_read_init_val
(
initfile
,
TRUE
,
"SRV_N_COM_THREADS"
,
str_buf
,
&
srv_n_com_threads
);
srv_read_init_val
(
initfile
,
TRUE
,
"SRV_N_WORKER_THREADS"
,
str_buf
,
&
srv_n_worker_threads
);
srv_read_init_val
(
initfile
,
TRUE
,
"SYNC_N_SPIN_WAIT_ROUNDS"
,
str_buf
,
&
srv_n_spin_wait_rounds
);
srv_read_init_val
(
initfile
,
TRUE
,
"SYNC_SPIN_WAIT_DELAY"
,
str_buf
,
&
srv_spin_wait_delay
);
srv_read_init_val
(
initfile
,
TRUE
,
"THREAD_PRIORITY_BOOST"
,
str_buf
,
&
srv_priority_boost
);
srv_read_init_val
(
initfile
,
TRUE
,
"N_SPACES"
,
str_buf
,
&
srv_n_spaces
);
srv_read_init_val
(
initfile
,
TRUE
,
"N_FILES"
,
str_buf
,
&
srv_n_files
);
srv_read_init_val
(
initfile
,
TRUE
,
"FILE_SIZE"
,
str_buf
,
&
srv_file_size
);
srv_read_init_val
(
initfile
,
TRUE
,
"N_LOG_GROUPS"
,
str_buf
,
&
srv_n_log_groups
);
srv_read_init_val
(
initfile
,
TRUE
,
"N_LOG_FILES"
,
str_buf
,
&
srv_n_log_files
);
srv_read_init_val
(
initfile
,
TRUE
,
"LOG_FILE_SIZE"
,
str_buf
,
&
srv_log_file_size
);
srv_read_init_val
(
initfile
,
TRUE
,
"LOG_ARCHIVE_ON"
,
str_buf
,
&
srv_log_archive_on
);
srv_read_init_val
(
initfile
,
TRUE
,
"LOG_BUFFER_SIZE"
,
str_buf
,
&
srv_log_buffer_size
);
srv_read_init_val
(
initfile
,
TRUE
,
"FLUSH_LOG_AT_TRX_COMMIT"
,
str_buf
,
&
srv_flush_log_at_trx_commit
);
srv_read_init_val
(
initfile
,
TRUE
,
"POOL_SIZE"
,
str_buf
,
&
srv_pool_size
);
srv_read_init_val
(
initfile
,
TRUE
,
"MEM_POOL_SIZE"
,
str_buf
,
&
srv_mem_pool_size
);
srv_read_init_val
(
initfile
,
TRUE
,
"LOCK_TABLE_SIZE"
,
str_buf
,
&
srv_lock_table_size
);
srv_read_init_val
(
initfile
,
TRUE
,
"SIM_DISK_WAIT_PCT"
,
str_buf
,
&
srv_sim_disk_wait_pct
);
srv_read_init_val
(
initfile
,
TRUE
,
"SIM_DISK_WAIT_LEN"
,
str_buf
,
&
srv_sim_disk_wait_len
);
srv_read_init_val
(
initfile
,
TRUE
,
"SIM_DISK_WAIT_BY_YIELD"
,
str_buf
,
&
srv_sim_disk_wait_by_yield
);
srv_read_init_val
(
initfile
,
TRUE
,
"SIM_DISK_WAIT_BY_WAIT"
,
str_buf
,
&
srv_sim_disk_wait_by_wait
);
srv_read_init_val
(
initfile
,
TRUE
,
"MEASURE_CONTENTION"
,
str_buf
,
&
srv_measure_contention
);
srv_read_init_val
(
initfile
,
TRUE
,
"MEASURE_BY_SPIN"
,
str_buf
,
&
srv_measure_by_spin
);
srv_read_init_val
(
initfile
,
TRUE
,
"PRINT_THREAD_RELEASES"
,
str_buf
,
&
srv_print_thread_releases
);
srv_read_init_val
(
initfile
,
TRUE
,
"PRINT_LOCK_WAITS"
,
str_buf
,
&
srv_print_lock_waits
);
if
(
srv_print_lock_waits
)
{
lock_print_waits
=
TRUE
;
}
srv_read_init_val
(
initfile
,
TRUE
,
"PRINT_BUF_IO"
,
str_buf
,
&
srv_print_buf_io
);
if
(
srv_print_buf_io
)
{
buf_debug_prints
=
TRUE
;
}
srv_read_init_val
(
initfile
,
TRUE
,
"PRINT_LOG_IO"
,
str_buf
,
&
srv_print_log_io
);
if
(
srv_print_log_io
)
{
log_debug_writes
=
TRUE
;
}
srv_read_init_val
(
initfile
,
TRUE
,
"PRINT_PARSED_SQL"
,
str_buf
,
&
srv_print_parsed_sql
);
if
(
srv_print_parsed_sql
)
{
pars_print_lexed
=
TRUE
;
}
srv_read_init_val
(
initfile
,
TRUE
,
"PRINT_LATCH_WAITS"
,
str_buf
,
&
srv_print_latch_waits
);
srv_read_init_val
(
initfile
,
TRUE
,
"TEST_EXTRA_MUTEXES"
,
str_buf
,
&
srv_test_extra_mutexes
);
srv_read_init_val
(
initfile
,
TRUE
,
"TEST_NOCACHE"
,
str_buf
,
&
srv_test_nocache
);
srv_read_init_val
(
initfile
,
TRUE
,
"TEST_CACHE_EVICT"
,
str_buf
,
&
srv_test_cache_evict
);
srv_read_init_val
(
initfile
,
TRUE
,
"TEST_SYNC"
,
str_buf
,
&
srv_test_sync
);
srv_read_init_val
(
initfile
,
TRUE
,
"TEST_N_THREADS"
,
str_buf
,
&
srv_test_n_threads
);
srv_read_init_val
(
initfile
,
TRUE
,
"TEST_N_LOOPS"
,
str_buf
,
&
srv_test_n_loops
);
srv_read_init_val
(
initfile
,
TRUE
,
"TEST_N_FREE_RNDS"
,
str_buf
,
&
srv_test_n_free_rnds
);
srv_read_init_val
(
initfile
,
TRUE
,
"TEST_N_RESERVED_RNDS"
,
str_buf
,
&
srv_test_n_reserved_rnds
);
srv_read_init_val
(
initfile
,
TRUE
,
"TEST_N_MUTEXES"
,
str_buf
,
&
srv_test_n_mutexes
);
srv_read_init_val
(
initfile
,
TRUE
,
"TEST_ARRAY_SIZE"
,
str_buf
,
&
srv_test_array_size
);
}
#endif
/*************************************************************************
/*************************************************************************
Initializes the server. */
Initializes the server. */
...
...
innobase/trx/trx0trx.c
View file @
ec06c782
...
@@ -1147,8 +1147,6 @@ trx_sig_send(
...
@@ -1147,8 +1147,6 @@ trx_sig_send(
ut_a
(
0
);
ut_a
(
0
);
/* sess_raise_error_low(trx, 0, 0, NULL, NULL, NULL, NULL,
"Incompatible signal"); */
return
(
FALSE
);
return
(
FALSE
);
}
}
...
@@ -1197,9 +1195,6 @@ trx_sig_send(
...
@@ -1197,9 +1195,6 @@ trx_sig_send(
in the error state: */
in the error state: */
ut_a
(
0
);
ut_a
(
0
);
sess_raise_error_low
(
trx
,
0
,
0
,
NULL
,
NULL
,
NULL
,
NULL
,
(
char
*
)
"Signal from another session, or a break execution signal"
);
}
}
/* If there were no other signals ahead in the queue, try to start
/* If there were no other signals ahead in the queue, try to start
...
...
innobase/usr/usr0sess.c
View file @
ec06c782
...
@@ -28,6 +28,13 @@ Created 6/25/1996 Heikki Tuuri
...
@@ -28,6 +28,13 @@ Created 6/25/1996 Heikki Tuuri
/* The session system global data structure */
/* The session system global data structure */
sess_sys_t
*
sess_sys
=
NULL
;
sess_sys_t
*
sess_sys
=
NULL
;
/*************************************************************************
Closes a session, freeing the memory occupied by it. */
static
void
sess_close
(
/*=======*/
sess_t
*
sess
);
/* in, own: session object */
/*************************************************************************
/*************************************************************************
Communicates an error message to the client. If sess->client_waits is not
Communicates an error message to the client. If sess->client_waits is not
TRUE, puts the session to error state and does not try to send the error
TRUE, puts the session to error state and does not try to send the error
...
@@ -85,42 +92,6 @@ sess_cli_msg_set_sess(
...
@@ -85,42 +92,6 @@ sess_cli_msg_set_sess(
mach_write_to_4
(
str
+
SESS_CLI_MSG_SESS_ID_CHECK
,
fold
);
mach_write_to_4
(
str
+
SESS_CLI_MSG_SESS_ID_CHECK
,
fold
);
}
}
/*************************************************************************
Returns the session to which a message from a client is addressed.
NOTE: this function does not assume that the message is uncorrupted. */
static
sess_t
*
sess_cli_msg_get_sess
(
/*==================*/
/* out: session, NULL if not found */
byte
*
str
,
/* in: message string */
ulint
len
)
/* in: message string length */
{
sess_t
*
sess
;
ulint
fold
;
dulint
id
;
ut_ad
(
mutex_own
(
&
kernel_mutex
));
if
(
len
<
SESS_CLI_MSG_SESS_ID_CHECK
+
4
)
{
return
(
NULL
);
}
id
=
mach_read_from_8
(
str
+
SESS_CLI_MSG_SESS_ID
);
fold
=
sess_id_fold
(
id
);
if
(
fold
!=
mach_read_from_4
(
str
+
SESS_CLI_MSG_SESS_ID_CHECK
))
{
return
(
NULL
);
}
HASH_SEARCH
(
hash
,
sess_sys
->
hash
,
fold
,
sess
,
UT_DULINT_EQ
(
id
,
sess
->
id
));
return
(
sess
);
}
/***************************************************************************
/***************************************************************************
Decrements the reference count of a session and closes it, if desired. */
Decrements the reference count of a session and closes it, if desired. */
UNIV_INLINE
UNIV_INLINE
...
@@ -311,6 +282,7 @@ sess_open(
...
@@ -311,6 +282,7 @@ sess_open(
/*************************************************************************
/*************************************************************************
Closes a session, freeing the memory occupied by it. */
Closes a session, freeing the memory occupied by it. */
static
void
void
sess_close
(
sess_close
(
/*=======*/
/*=======*/
...
@@ -595,330 +567,6 @@ sess_error_low(
...
@@ -595,330 +567,6 @@ sess_error_low(
NULL
,
NULL
,
NULL
);
NULL
,
NULL
,
NULL
);
}
}
/*************************************************************************
Raises an SQL error. */
void
sess_raise_error_low
(
/*=================*/
trx_t
*
trx
,
/* in: transaction */
ulint
err_no
,
/* in: error number */
ulint
type
,
/* in: more info of the error, or 0 */
dict_table_t
*
table
,
/* in: dictionary table or NULL */
dict_index_t
*
index
,
/* in: table index or NULL */
dtuple_t
*
tuple
,
/* in: tuple to insert or NULL */
rec_t
*
rec
,
/* in: record or NULL */
char
*
err_str
)
/* in: arbitrary null-terminated error string,
or NULL */
{
char
*
str
;
ulint
len
;
ut_ad
(
mutex_own
(
&
kernel_mutex
));
str
=
mem_alloc
(
64000
);
len
=
0
;
len
+=
sprintf
(
str
+
len
,
"Error number: %lu"
,
err_no
);
if
(
type
)
{
len
+=
sprintf
(
str
+
len
,
", type: %lu"
,
type
);
}
if
(
table
)
{
len
+=
sprintf
(
str
+
len
,
", table: %s"
,
table
->
name
);
}
if
(
index
)
{
len
+=
sprintf
(
str
+
len
,
", index: %s"
,
index
->
name
);
}
if
(
tuple
)
{
len
+=
sprintf
(
str
+
len
,
", tuple:"
);
len
+=
dtuple_sprintf
(
str
+
len
,
8192
,
tuple
);
}
if
(
rec
)
{
len
+=
sprintf
(
str
+
len
,
", record:"
);
len
+=
rec_sprintf
(
str
+
len
,
8192
,
rec
);
}
if
(
err_str
)
{
len
+=
sprintf
(
str
+
len
,
", %s"
,
err_str
);
}
str
[
len
]
=
'\0'
;
ut_a
(
len
<
64000
);
if
(
trx
->
sess
)
{
sess_error_low
(
trx
->
sess
,
err_no
,
str
);
}
else
{
mem_free
(
str
);
}
}
/***************************************************************************
Processes a client message which is part of a bigger message. */
static
ibool
sess_receive_msg_part
(
/*==================*/
/* TRUE if message completed */
sess_t
*
sess
,
/* in: session */
byte
*
str
,
/* in: message string */
ulint
len
)
/* in: message length */
{
ulint
cont
;
cont
=
sess_cli_msg_get_continue
(
str
);
ut_ad
(
cont
!=
SESS_MSG_SINGLE_PART
);
if
(
cont
==
SESS_MSG_FIRST_PART
)
{
if
(
sess
->
big_msg
)
{
sess_error_low
(
sess
,
SESS_ERR_MSG_LOST
,
NULL
);
return
(
FALSE
);
}
sess
->
big_msg_size
=
1024
*
sess_cli_msg_get_cont_size
(
str
);
sess
->
big_msg
=
mem_alloc
(
sess
->
big_msg_size
);
if
(
sess
->
big_msg
==
NULL
)
{
sess_error_low
(
sess
,
SESS_ERR_OUT_OF_MEMORY
,
NULL
);
return
(
FALSE
);
}
ut_memcpy
(
sess
->
big_msg
,
str
,
len
);
sess
->
big_msg_len
=
len
;
return
(
FALSE
);
}
else
{
if
(
sess
->
big_msg
==
NULL
)
{
sess_error_low
(
sess
,
SESS_ERR_MSG_LOST
,
NULL
);
return
(
FALSE
);
}
ut_memcpy
(
sess
->
big_msg
+
sess
->
big_msg_len
,
str
+
SESS_CLI_MSG_DATA
,
len
-
SESS_CLI_MSG_DATA
);
sess
->
big_msg_len
+=
len
-
SESS_CLI_MSG_DATA
;
if
(
cont
==
SESS_MSG_MIDDLE_PART
)
{
return
(
FALSE
);
}
return
(
TRUE
);
}
}
/***************************************************************************
Processes a client message which requires SQL parsing. This function decodes
the client message built in SQLPrepare. NOTE: The kernel mutex is temporarily
released within this function. */
static
void
sess_receive_prepare
(
/*=================*/
sess_t
*
sess
,
/* in: session */
byte
*
cli_msg
,
/* in: client message */
ulint
len
)
/* in: message length */
{
dulint
error_count
;
que_t
*
graph
;
byte
msg
[
ODBC_DATAGRAM_SIZE
];
UT_NOT_USED
(
len
);
ut_ad
(
mutex_own
(
&
kernel_mutex
));
error_count
=
sess
->
error_count
;
/* Make sure the session object is not freed during the parsing */
sess_refer_count_inc
(
sess
);
/* We release the kernel mutex before parsing the command: this is
to reduce contention on the kernel mutex */
mutex_exit
(
&
kernel_mutex
);
/* printf("To parse query %s\n", (char*)(cli_msg + SESS_CLI_MSG_DATA)); */
graph
=
pars_sql
((
char
*
)(
cli_msg
+
SESS_CLI_MSG_DATA
));
mutex_enter
(
&
kernel_mutex
);
if
(
graph
==
NULL
)
{
/* Error in parsing */
sess_error_low
(
sess
,
SESS_ERR_SQL_ERROR
,
NULL
);
sess_refer_count_dec
(
sess
);
ut_error
;
return
;
}
if
(
!
UT_DULINT_EQ
(
error_count
,
sess
->
error_count
))
{
/* An error, or an asyncronous signal on the session happened
when the kernel mutex was not reserved: discard graph */
graph
->
state
=
QUE_FORK_INVALID
;
que_graph_try_free
(
graph
);
sess_refer_count_dec
(
sess
);
ut_error
;
return
;
}
UT_LIST_ADD_LAST
(
graphs
,
sess
->
graphs
,
graph
);
graph
->
id
=
sess
->
next_graph_id
;
sess
->
next_graph_id
++
;
/* Tell the client that the preparation succeeded and communicate info
about the possible query parameters: the message will be decoded in
SQLPrepare */
ut_ad
(
sess
->
client_waits
);
sess_srv_msg_init
(
sess
,
msg
,
SESS_SRV_SUCCESS
);
mach_write_to_4
(
msg
+
SESS_SRV_MSG_DATA
,
graph
->
id
);
mutex_exit
(
&
kernel_mutex
);
len
=
pars_write_query_param_info
(
msg
+
SESS_SRV_MSG_DATA
+
4
,
graph
);
mutex_enter
(
&
kernel_mutex
);
sess_srv_msg_send
(
sess
,
msg
,
SESS_SRV_MSG_DATA
+
4
+
len
,
SESS_RELEASE_KERNEL
);
sess_refer_count_dec
(
sess
);
}
/***************************************************************************
Processes a client message which does not require SQL parsing. This function
decodes the client message built in SQLExecute. */
static
void
sess_receive_command
(
/*=================*/
sess_t
*
sess
,
/* in: session */
byte
*
cli_msg
,
/* in: client message */
ulint
len
,
/* in: message length */
ulint
type
)
/* in: message type */
{
proc_node_t
*
proc_node
;
call_node_t
*
call_node
;
dict_proc_t
*
dict_proc
;
que_thr_t
*
thr
;
que_t
*
graph
;
ulint
stat_id
;
UT_NOT_USED
(
len
);
UT_NOT_USED
(
type
);
ut_ad
(
mutex_own
(
&
kernel_mutex
));
sess
->
client_waits
=
TRUE
;
stat_id
=
mach_read_from_4
(
cli_msg
+
SESS_CLI_MSG_DATA
);
/* Look for the statement from the list of query graphs */
graph
=
UT_LIST_GET_FIRST
(
sess
->
graphs
);
while
(
graph
!=
NULL
)
{
if
(
graph
->
id
==
stat_id
)
{
break
;
}
graph
=
UT_LIST_GET_NEXT
(
graphs
,
graph
);
}
if
(
graph
==
NULL
)
{
/* Could not find the right graph: error */
sess_error_low
(
sess
,
SESS_ERR_STMT_NOT_FOUND
,
NULL
);
return
;
}
if
(
graph
->
state
!=
QUE_FORK_COMMAND_WAIT
)
{
sess_error_low
(
sess
,
SESS_ERR_STMT_NOT_READY
,
NULL
);
return
;
}
/* printf("To execute stat %lu\n", stat_id); */
if
(
graph
->
fork_type
==
QUE_FORK_PROCEDURE_CALL
)
{
/* It is a stored procedure call: retrieve a parsed copy of
the procedure from the dictionary cache */
mutex_exit
(
&
kernel_mutex
);
call_node
=
que_fork_get_child
(
graph
);
graph
=
dict_procedure_reserve_parsed_copy
(
call_node
->
procedure_def
);
graph
->
trx
=
sess
->
trx
;
/* Retrieve the procedure input parameters from the message */
pars_proc_read_input_params_from_buf
(
graph
,
cli_msg
+
SESS_CLI_MSG_DATA
+
4
);
mutex_enter
(
&
kernel_mutex
);
}
else
{
/* It is a create procedure command: add the procedure to the
dictionary cache */
ut_ad
(
graph
->
fork_type
==
QUE_FORK_PROCEDURE
);
mutex_exit
(
&
kernel_mutex
);
proc_node
=
que_fork_get_child
(
graph
);
dict_proc
=
dict_mem_procedure_create
(
proc_node
->
proc_id
->
name
,
proc_node
->
sym_tab
->
sql_string
,
graph
);
dict_procedure_add_to_cache
(
dict_proc
);
mutex_enter
(
&
kernel_mutex
);
sess_srv_msg_send_simple
(
sess
,
SESS_SRV_SUCCESS
,
SESS_RELEASE_KERNEL
);
return
;
}
/* Choose a query thread for execution */
thr
=
que_fork_start_command
(
graph
,
SESS_COMM_EXECUTE
,
0
);
ut_ad
(
thr
);
sess
->
trx
->
graph
=
graph
;
mutex_exit
(
&
kernel_mutex
);
/* Run query threads with the kernel mutex released */
que_run_threads
(
thr
);
mutex_enter
(
&
kernel_mutex
);
}
/***************************************************************************
/***************************************************************************
When a command has been completed, this function sends the message about it
When a command has been completed, this function sends the message about it
to the client. */
to the client. */
...
@@ -936,239 +584,3 @@ sess_command_completed_message(
...
@@ -936,239 +584,3 @@ sess_command_completed_message(
SESS_RELEASE_KERNEL
);
SESS_RELEASE_KERNEL
);
mutex_exit
(
&
kernel_mutex
);
mutex_exit
(
&
kernel_mutex
);
}
}
/***************************************************************************
Processes a break message from the client. */
static
void
sess_receive_break
(
/*===============*/
sess_t
*
sess
)
/* in: session */
{
ut_ad
(
mutex_own
(
&
kernel_mutex
));
/* Rollback the latest incomplete SQL statement */
sess_error_low
(
sess
,
SESS_ERR_BREAK_BY_CLIENT
,
NULL
);
}
/***************************************************************************
Processes a message from a client. NOTE: Releases the kernel mutex temporarily
when parsing an SQL string. */
void
sess_receive_msg_rel_kernel
(
/*========================*/
sess_t
*
sess
,
/* in: session */
byte
*
str
,
/* in: message string */
ulint
len
)
/* in: message length */
{
dulint
msg_no
;
ulint
msg_type
;
ulint
cont
;
ibool
is_big_msg
=
FALSE
;
ibool
client_waited
;
ut_ad
(
mutex_own
(
&
kernel_mutex
));
ut_ad
(
!
sess
->
disconnecting
);
client_waited
=
sess
->
client_waits
;
sess
->
client_waits
=
TRUE
;
if
(
sess
->
state
==
SESS_ERROR
)
{
/* Send a buffered error message */
sess_srv_msg_send_error
(
sess
);
return
;
}
if
(
FALSE
==
sess_cli_msg_check_consistency
(
str
,
len
))
{
/* Message from the client was corrupted */
sess_error_low
(
sess
,
SESS_ERR_MSG_CORRUPTED
,
NULL
);
return
;
}
msg_no
=
sess_cli_msg_get_msg_no
(
str
);
UT_DULINT_INC
(
sess
->
msgs_recv
);
if
(
!
UT_DULINT_EQ
(
msg_no
,
sess
->
msgs_recv
))
{
sess_error_low
(
sess
,
SESS_ERR_MSG_LOST
,
NULL
);
sess
->
msgs_recv
=
msg_no
;
return
;
}
msg_type
=
sess_cli_msg_get_type
(
str
);
if
(
msg_type
==
SESS_CLI_BREAK_EXECUTION
)
{
sess_receive_break
(
sess
);
return
;
}
if
(
client_waited
)
{
/* Client sent an extraneous message which is not a break
command: an error */
sess_error_low
(
sess
,
SESS_ERR_EXTRANEOUS_MSG
,
NULL
);
return
;
}
/*-----------------------------------------------------------*/
/* Handle big messages */
cont
=
sess_cli_msg_get_continue
(
str
);
if
(
cont
==
SESS_MSG_SINGLE_PART
)
{
if
(
sess
->
big_msg
)
{
sess_error_low
(
sess
,
SESS_ERR_MSG_LOST
,
NULL
);
return
;
}
}
else
{
ut_error
;
/* Not in use */
is_big_msg
=
sess_receive_msg_part
(
sess
,
str
,
len
);
if
(
is_big_msg
)
{
str
=
sess
->
big_msg
;
len
=
sess
->
big_msg_len
;
sess
->
big_msg
=
NULL
;
}
else
{
return
;
}
}
/*-----------------------------------------------------------*/
/* The session has received a complete message from the client */
ut_ad
(
!
UT_LIST_GET_FIRST
((
sess
->
trx
)
->
signals
));
if
(
msg_type
==
SESS_CLI_PREPARE
)
{
/* Note that the kernel mutex is temporarily released when
the SQL string is parsed */
sess_receive_prepare
(
sess
,
str
,
len
);
}
else
{
/* Note that the kernel mutex is temporarily released when the
command is executed */
sess_receive_command
(
sess
,
str
,
len
,
msg_type
);
}
if
(
is_big_msg
)
{
mem_free
(
str
);
}
}
/***********************************************************************
Opens a new connection and creates a session. */
static
ibool
sess_open_connection
(
/*=================*/
byte
*
str
,
/* in: message string */
ulint
len
,
/* in: string length */
byte
*
addr
,
/* in: user address string */
ulint
alen
)
/* in: user address length */
{
dulint
sess_id
;
sess_t
*
sess
;
sess_id
=
mach_read_from_8
(
str
+
SESS_CLI_MSG_SESS_ID
);
if
(
!
(
UT_DULINT_EQ
(
sess_id
,
ut_dulint_zero
))
||
!
(
sess_cli_msg_get_type
(
str
)
==
SESS_CLI_CONNECT
))
{
/* It is not a valid connect message */
return
(
FALSE
);
}
ut_a
(
len
==
SESS_CLI_MSG_DATA
);
sess
=
sess_open
(
srv_sys
->
endpoint
,
addr
,
alen
);
sess_srv_msg_send_simple
(
sess
,
SESS_SRV_ACCEPT_CONNECT
,
SESS_NOT_RELEASE_KERNEL
);
return
(
TRUE
);
}
/***********************************************************************
Starts a new connection and a session, or starts a query based on a client
message. This is called by a SRV_COM thread. */
void
sess_process_cli_msg
(
/*=================*/
byte
*
str
,
/* in: message string */
ulint
len
,
/* in: string length */
byte
*
addr
,
/* in: address string */
ulint
alen
)
/* in: address length */
{
sess_t
*
sess
;
ibool
success
;
UT_NOT_USED
(
addr
);
UT_NOT_USED
(
alen
);
mutex_enter
(
&
kernel_mutex
);
sess
=
sess_cli_msg_get_sess
(
str
,
len
);
if
(
sess
==
NULL
)
{
/* There was no matching session */
if
(
sess_cli_msg_check_consistency
(
str
,
len
))
{
/* As the message is consistent, it may be a connect
message */
/* printf("%s\n", addr); */
success
=
sess_open_connection
(
str
,
len
,
addr
,
alen
);
if
(
success
)
{
mutex_exit
(
&
kernel_mutex
);
return
;
}
}
/* Could not make sense of the message: write an error entry
to the system error log */
/* srv_err_log_insert(
"MESSAGE SENT TO AN UNKNOWN SESSION");*/
ut_error
;
mutex_exit
(
&
kernel_mutex
);
return
;
}
if
(
sess
->
disconnecting
)
{
/* srv_err_log_insert(
"MESSAGE SENT TO A DISCONNECTING SESSION");*/
ut_error
;
mutex_exit
(
&
kernel_mutex
);
return
;
}
sess_receive_msg_rel_kernel
(
sess
,
str
,
len
);
mutex_exit
(
&
kernel_mutex
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment