Commit 9469efa5 authored by inaam's avatar inaam

branches/innodb+

Provide support for native AIO on Linux.

rb://46 approved by: Marko
parent 157c2901
...@@ -4436,11 +4436,14 @@ fil_aio_wait( ...@@ -4436,11 +4436,14 @@ fil_aio_wait(
ut_ad(fil_validate()); ut_ad(fil_validate());
if (os_aio_use_native_aio) { if (srv_use_native_aio) {
srv_set_io_thread_op_info(segment, "native aio handle"); srv_set_io_thread_op_info(segment, "native aio handle");
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
ret = os_aio_windows_handle(segment, 0, &fil_node, ret = os_aio_windows_handle(segment, 0, &fil_node,
&message, &type); &message, &type);
#elif defined(LINUX_NATIVE_AIO)
ret = os_aio_linux_handle(segment, &fil_node,
&message, &type);
#else #else
ret = 0; /* Eliminate compiler warning */ ret = 0; /* Eliminate compiler warning */
ut_error; ut_error;
......
...@@ -9573,6 +9573,11 @@ static MYSQL_SYSVAR_STR(version, innodb_version_str, ...@@ -9573,6 +9573,11 @@ static MYSQL_SYSVAR_STR(version, innodb_version_str,
PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY, PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
"InnoDB version", NULL, NULL, INNODB_VERSION_STR); "InnoDB version", NULL, NULL, INNODB_VERSION_STR);
static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
"Use native AIO if supported on this platform.",
NULL, NULL, TRUE);
static struct st_mysql_sys_var* innobase_system_variables[]= { static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(additional_mem_pool_size),
MYSQL_SYSVAR(autoextend_increment), MYSQL_SYSVAR(autoextend_increment),
...@@ -9619,6 +9624,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { ...@@ -9619,6 +9624,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(thread_sleep_delay), MYSQL_SYSVAR(thread_sleep_delay),
MYSQL_SYSVAR(autoinc_lock_mode), MYSQL_SYSVAR(autoinc_lock_mode),
MYSQL_SYSVAR(version), MYSQL_SYSVAR(version),
MYSQL_SYSVAR(use_native_aio),
NULL NULL
}; };
......
...@@ -51,12 +51,6 @@ typedef int os_file_t; ...@@ -51,12 +51,6 @@ typedef int os_file_t;
extern ulint os_innodb_umask; extern ulint os_innodb_umask;
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads */
extern ibool os_aio_use_native_aio;
#define OS_FILE_SECTOR_SIZE 512 #define OS_FILE_SECTOR_SIZE 512
/* The next value should be smaller or equal to the smallest sector size used /* The next value should be smaller or equal to the smallest sector size used
...@@ -98,6 +92,7 @@ log. */ ...@@ -98,6 +92,7 @@ log. */
to become available again */ to become available again */
#define OS_FILE_SHARING_VIOLATION 76 #define OS_FILE_SHARING_VIOLATION 76
#define OS_FILE_ERROR_NOT_SPECIFIED 77 #define OS_FILE_ERROR_NOT_SPECIFIED 77
#define OS_FILE_AIO_INTERRUPTED 78
/* Types for aio operations */ /* Types for aio operations */
#define OS_FILE_READ 10 #define OS_FILE_READ 10
...@@ -556,9 +551,10 @@ in the three first aio arrays is the parameter n_segments given to the ...@@ -556,9 +551,10 @@ in the three first aio arrays is the parameter n_segments given to the
function. The caller must create an i/o handler thread for each segment in function. The caller must create an i/o handler thread for each segment in
the four first arrays, but not for the sync aio array. */ the four first arrays, but not for the sync aio array. */
UNIV_INTERN UNIV_INTERN
void ibool
os_aio_init( os_aio_init(
/*========*/ /*========*/
/* out: TRUE on success. */
ulint n, /* in: maximum number of pending aio operations ulint n, /* in: maximum number of pending aio operations
allowed; n must be divisible by n_segments */ allowed; n must be divisible by n_segments */
ulint n_segments, /* in: combined number of segments in the four ulint n_segments, /* in: combined number of segments in the four
...@@ -737,4 +733,32 @@ innobase_mysql_tmpfile(void); ...@@ -737,4 +733,32 @@ innobase_mysql_tmpfile(void);
/* out: temporary file descriptor, or < 0 on error */ /* out: temporary file descriptor, or < 0 on error */
#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */ #endif /* !UNIV_HOTBACKUP && !__NETWARE__ */
#if defined(LINUX_NATIVE_AIO)
/**************************************************************************
This function is only used in Linux native asynchronous i/o.
Waits for an aio operation to complete. This function is used to wait the
for completed requests. The aio array of pending requests is divided
into segments. The thread specifies which segment or slot it wants to wait
for. NOTE: this function will also take care of freeing the aio slot,
therefore no other thread is allowed to do the freeing! */
UNIV_INTERN
ibool
os_aio_linux_handle(
/*================*/
/* out: TRUE if the IO was successful */
ulint global_seg, /* in: segment number in the aio array
to wait for; segment 0 is the ibuf
i/o thread, segment 1 is log i/o thread,
then follow the non-ibuf read threads,
and the last are the non-ibuf write
threads. */
fil_node_t**message1, /* out: the messages passed with the */
void** message2, /* aio request; note that in case the
aio operation failed, these output
parameters are valid and can be used to
restart the operation. */
ulint* type); /* out: OS_FILE_WRITE or ..._READ */
#endif /* LINUX_NATIVE_AIO */
#endif #endif
...@@ -68,6 +68,11 @@ extern ulint srv_check_file_format_at_startup; ...@@ -68,6 +68,11 @@ extern ulint srv_check_file_format_at_startup;
on duplicate key checking and foreign key checking */ on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog; extern ibool srv_locks_unsafe_for_binlog;
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
extern my_bool srv_use_native_aio;
extern ulint srv_n_data_files; extern ulint srv_n_data_files;
extern char** srv_data_file_names; extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes; extern ulint* srv_data_file_sizes;
......
...@@ -162,6 +162,9 @@ operations (very slow); also UNIV_DEBUG must be defined */ ...@@ -162,6 +162,9 @@ operations (very slow); also UNIV_DEBUG must be defined */
for compressed pages */ for compressed pages */
#define UNIV_ZIP_COPY /* call page_zip_copy_recs() #define UNIV_ZIP_COPY /* call page_zip_copy_recs()
more often */ more often */
#define UNIV_AIO_DEBUG /* prints info about
submitted and reaped AIO
requests to the log. */
#endif #endif
#define UNIV_BTR_DEBUG /* check B-tree links */ #define UNIV_BTR_DEBUG /* check B-tree links */
......
This diff is collapsed.
...@@ -12,6 +12,14 @@ MYSQL_PLUGIN_ACTIONS(innobase, [ ...@@ -12,6 +12,14 @@ MYSQL_PLUGIN_ACTIONS(innobase, [
AC_C_BIGENDIAN AC_C_BIGENDIAN
case "$target_os" in case "$target_os" in
lin*) lin*)
AC_CHECK_HEADER(libaio.h,
AC_CHECK_LIB(aio, io_setup,
LIBS="$LIBS -laio"
AC_DEFINE(LINUX_NATIVE_AIO, [1],
[Linux native async I/O support]),
AC_MSG_WARN([No Linux native async I/O])),
AC_MSG_WARN([No Linux native async I/O]))
CFLAGS="$CFLAGS -DUNIV_LINUX";; CFLAGS="$CFLAGS -DUNIV_LINUX";;
hpux10*) hpux10*)
CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX -DUNIV_HPUX10";; CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX -DUNIV_HPUX10";;
......
...@@ -102,6 +102,12 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX; ...@@ -102,6 +102,12 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
on duplicate key checking and foreign key checking */ on duplicate key checking and foreign key checking */
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE; UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
UNIV_INTERN my_bool srv_use_native_aio = TRUE;
UNIV_INTERN ulint srv_n_data_files = 0; UNIV_INTERN ulint srv_n_data_files = 0;
UNIV_INTERN char** srv_data_file_names = NULL; UNIV_INTERN char** srv_data_file_names = NULL;
/* size in database pages */ /* size in database pages */
......
...@@ -969,6 +969,7 @@ innobase_start_or_create_for_mysql(void) ...@@ -969,6 +969,7 @@ innobase_start_or_create_for_mysql(void)
ibool log_file_created; ibool log_file_created;
ibool log_created = FALSE; ibool log_created = FALSE;
ibool log_opened = FALSE; ibool log_opened = FALSE;
ibool success;
ib_uint64_t min_flushed_lsn; ib_uint64_t min_flushed_lsn;
ib_uint64_t max_flushed_lsn; ib_uint64_t max_flushed_lsn;
#ifdef UNIV_LOG_ARCHIVE #ifdef UNIV_LOG_ARCHIVE
...@@ -1071,7 +1072,6 @@ innobase_start_or_create_for_mysql(void) ...@@ -1071,7 +1072,6 @@ innobase_start_or_create_for_mysql(void)
srv_is_being_started = TRUE; srv_is_being_started = TRUE;
srv_startup_is_before_trx_rollback_phase = TRUE; srv_startup_is_before_trx_rollback_phase = TRUE;
os_aio_use_native_aio = FALSE;
#ifdef __WIN__ #ifdef __WIN__
if (os_get_os_version() == OS_WIN95 if (os_get_os_version() == OS_WIN95
...@@ -1083,12 +1083,30 @@ innobase_start_or_create_for_mysql(void) ...@@ -1083,12 +1083,30 @@ innobase_start_or_create_for_mysql(void)
but when run in conjunction with InnoDB Hot Backup, it seemed but when run in conjunction with InnoDB Hot Backup, it seemed
to corrupt the data files. */ to corrupt the data files. */
os_aio_use_native_aio = FALSE; srv_use_native_aio = FALSE;
} else { } else {
/* On Win 2000 and XP use async i/o */ /* On Win 2000 and XP use async i/o */
os_aio_use_native_aio = TRUE; srv_use_native_aio = TRUE;
} }
#elif defined(LINUX_NATIVE_AIO)
if (srv_use_native_aio) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Using Linux native AIO\n");
}
#else
/* Currently native AIO is supported only on windows and linux
and that also when the support is compiled in. In all other
cases, we ignore the setting of innodb_use_native_aio. */
/* TODO: comment this out after internal testing. */
fprintf(stderr, "Ignoring innodb_use_native_aio\n");
srv_use_native_aio = FALSE;
#endif #endif
if (srv_file_flush_method_str == NULL) { if (srv_file_flush_method_str == NULL) {
/* These are the default options */ /* These are the default options */
...@@ -1113,11 +1131,11 @@ innobase_start_or_create_for_mysql(void) ...@@ -1113,11 +1131,11 @@ innobase_start_or_create_for_mysql(void)
#else #else
} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) { } else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
srv_win_file_flush_method = SRV_WIN_IO_NORMAL; srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
os_aio_use_native_aio = FALSE; srv_use_native_aio = FALSE;
} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) { } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
os_aio_use_native_aio = FALSE; srv_use_native_aio = FALSE;
} else if (0 == ut_strcmp(srv_file_flush_method_str, } else if (0 == ut_strcmp(srv_file_flush_method_str,
"async_unbuffered")) { "async_unbuffered")) {
...@@ -1210,19 +1228,38 @@ innobase_start_or_create_for_mysql(void) ...@@ -1210,19 +1228,38 @@ innobase_start_or_create_for_mysql(void)
srv_n_file_io_threads = SRV_MAX_N_IO_THREADS; srv_n_file_io_threads = SRV_MAX_N_IO_THREADS;
} }
if (!os_aio_use_native_aio) { if (!srv_use_native_aio) {
/* In simulated aio we currently have use only for 4 threads */ /* In simulated aio we currently have use only for 4 threads */
srv_n_file_io_threads = 4; srv_n_file_io_threads = 4;
os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD success = os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD *
* srv_n_file_io_threads, srv_n_file_io_threads,
srv_n_file_io_threads, srv_n_file_io_threads,
SRV_MAX_N_PENDING_SYNC_IOS); SRV_MAX_N_PENDING_SYNC_IOS);
if (!success) {
return(DB_ERROR);
}
} else { } else {
os_aio_init(SRV_N_PENDING_IOS_PER_THREAD /* Windows has a pending IO per thread limit.
* srv_n_file_io_threads, Linux does not have any such restriction.
The question of what should be segment size
is a trade off. The larger size means longer
linear searches through the array and a smaller
value can lead to array being full, causing
unnecessary delays. The following value
for Linux is fairly arbitrary and needs to be
tested and tuned. */
success = os_aio_init(
#if defined(LINUX_NATIVE_AIO)
8 *
#endif /* LINUX_NATIVE_AIO */
SRV_N_PENDING_IOS_PER_THREAD *
srv_n_file_io_threads,
srv_n_file_io_threads, srv_n_file_io_threads,
SRV_MAX_N_PENDING_SYNC_IOS); SRV_MAX_N_PENDING_SYNC_IOS);
if (!success) {
return(DB_ERROR);
}
} }
fil_init(srv_max_n_open_files); fil_init(srv_max_n_open_files);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment