Commit 233fd792 authored by Vladislav Vaintroub's avatar Vladislav Vaintroub

improve Innodb locking primitives on Windows (MySQL Bug#52102, and fix...

improve Innodb locking primitives on Windows (MySQL Bug#52102, and fix OS_FILE_LIMIT - on Windows it is about 16 millions
parent 37f464f3
...@@ -374,7 +374,7 @@ inline ulonglong double2ulonglong(double d) ...@@ -374,7 +374,7 @@ inline ulonglong double2ulonglong(double d)
#define FN_DEVCHAR ':' #define FN_DEVCHAR ':'
#define FN_NETWORK_DRIVES /* Uses \\ to indicate network drives */ #define FN_NETWORK_DRIVES /* Uses \\ to indicate network drives */
#define FN_NO_CASE_SENCE /* Files are not case-sensitive */ #define FN_NO_CASE_SENCE /* Files are not case-sensitive */
#define OS_FILE_LIMIT 2048 #define OS_FILE_LIMIT 16*1024*1024
#define DO_NOT_REMOVE_THREAD_WRAPPERS #define DO_NOT_REMOVE_THREAD_WRAPPERS
#define thread_safe_increment(V,L) InterlockedIncrement((long*) &(V)) #define thread_safe_increment(V,L) InterlockedIncrement((long*) &(V))
......
...@@ -84,9 +84,7 @@ SET(INNODB_PLUGIN_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea ...@@ -84,9 +84,7 @@ SET(INNODB_PLUGIN_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea
usr/usr0sess.c usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
ut/ut0list.c ut/ut0wqueue.c) ut/ut0list.c ut/ut0wqueue.c)
# Windows atomics do not perform well. Disable Windows atomics by default.
# See bug#52102 for details.
#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION) ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION)
MYSQL_STORAGE_ENGINE(INNODB_PLUGIN) MYSQL_STORAGE_ENGINE(INNODB_PLUGIN)
...@@ -182,6 +182,10 @@ log. */ ...@@ -182,6 +182,10 @@ log. */
#define OS_WIN95 2 /*!< Microsoft Windows 95 */ #define OS_WIN95 2 /*!< Microsoft Windows 95 */
#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */ #define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */ #define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
#define OS_WINXP 5 /*!< Microsoft Windows XP */
#define OS_WINVISTA 6 /*!< Microsoft Windows Vista */
#define OS_WIN7 7 /*!< Microsoft Windows 7 */
extern ulint os_n_file_reads; extern ulint os_n_file_reads;
extern ulint os_n_file_writes; extern ulint os_n_file_writes;
......
...@@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri ...@@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri
#include "univ.i" #include "univ.i"
#include "ut0lst.h" #include "ut0lst.h"
#ifdef __WIN__ #ifdef _WIN32
/** Native event (slow)*/
typedef HANDLE os_native_event_t;
/** Native mutex */ /** Native mutex */
#define os_fast_mutex_t CRITICAL_SECTION typedef CRITICAL_SECTION os_fast_mutex_t;
/** Native condition variable */
/** Native event */ typedef CONDITION_VARIABLE os_cond_t;
typedef HANDLE os_native_event_t;
/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
/** Operating system event handle */
typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
os_native_event_t handle;
/*!< Windows event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
#else #else
/** Native mutex */ /** Native mutex */
typedef pthread_mutex_t os_fast_mutex_t; typedef pthread_mutex_t os_fast_mutex_t;
/** Native condition variable */
typedef pthread_cond_t os_cond_t;
#endif
/** Operating system event */ /** Operating system event */
typedef struct os_event_struct os_event_struct_t; typedef struct os_event_struct os_event_struct_t;
...@@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t; ...@@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */ /** An asynchronous signal sent between threads */
struct os_event_struct { struct os_event_struct {
#ifdef _WIN32
HANDLE handle; /*!< kernel event object, slow, used on older Windows */
#endif
os_fast_mutex_t os_mutex; /*!< this mutex protects the next os_fast_mutex_t os_mutex; /*!< this mutex protects the next
fields */ fields */
ibool is_set; /*!< this is TRUE when the event is ibool is_set; /*!< this is TRUE when the event is
...@@ -76,12 +69,14 @@ struct os_event_struct { ...@@ -76,12 +69,14 @@ struct os_event_struct {
this event */ this event */
ib_int64_t signal_count; /*!< this is incremented each time ib_int64_t signal_count; /*!< this is incremented each time
the event becomes signaled */ the event becomes signaled */
pthread_cond_t cond_var; /*!< condition variable is used in os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */ waiting for the event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list; UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */ /*!< list of all created events */
}; };
#endif
/** Operating system mutex */ /** Operating system mutex */
typedef struct os_mutex_struct os_mutex_str_t; typedef struct os_mutex_struct os_mutex_str_t;
...@@ -198,21 +193,6 @@ os_event_wait_time( ...@@ -198,21 +193,6 @@ os_event_wait_time(
os_event_t event, /*!< in: event to wait */ os_event_t event, /*!< in: event to wait */
ulint time); /*!< in: timeout in microseconds, or ulint time); /*!< in: timeout in microseconds, or
OS_SYNC_INFINITE_TIME */ OS_SYNC_INFINITE_TIME */
#ifdef __WIN__
/**********************************************************//**
Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled.
@return index of the event which was signaled */
UNIV_INTERN
ulint
os_event_wait_multiple(
/*===================*/
ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array);
/*!< in: pointer to an array of event
handles */
#endif
/*********************************************************//** /*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the Creates an operating system mutex semaphore. Because these are slow, the
mutex semaphore of InnoDB itself (mutex_t) should be used where possible. mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
...@@ -385,7 +365,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */ ...@@ -385,7 +365,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
# define os_atomic_test_and_set_byte(ptr, new_val) \ # define os_atomic_test_and_set_byte(ptr, new_val) \
atomic_swap_uchar(ptr, new_val) atomic_swap_uchar(ptr, new_val)
#elif defined(HAVE_WINDOWS_ATOMICS) #elif defined(_WIN32)
#define HAVE_ATOMIC_BUILTINS #define HAVE_ATOMIC_BUILTINS
......
...@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri ...@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri
#endif #endif
/**********************************************************//** /**********************************************************//**
Acquires ownership of a fast mutex. Currently in Windows this is the same Acquires ownership of a fast mutex.
as os_fast_mutex_lock!
@return 0 if success, != 0 if was reserved by another thread */ @return 0 if success, != 0 if was reserved by another thread */
UNIV_INLINE UNIV_INLINE
ulint ulint
...@@ -38,9 +37,9 @@ os_fast_mutex_trylock( ...@@ -38,9 +37,9 @@ os_fast_mutex_trylock(
os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */ os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
{ {
#ifdef __WIN__ #ifdef __WIN__
EnterCriticalSection(fast_mutex); if (TryEnterCriticalSection(fast_mutex))
return 0;
return(0); return(1);
#else #else
/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
so that it returns 0 on success. In the operating system so that it returns 0 on success. In the operating system
......
...@@ -106,7 +106,9 @@ extern ulint srv_check_file_format_at_startup; ...@@ -106,7 +106,9 @@ extern ulint srv_check_file_format_at_startup;
on duplicate key checking and foreign key checking */ on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog; extern ibool srv_locks_unsafe_for_binlog;
#endif /* !UNIV_HOTBACKUP */ #endif /* !UNIV_HOTBACKUP */
#ifdef __WIN__
extern ibool srv_use_native_conditions;
#endif
extern ulint srv_n_data_files; extern ulint srv_n_data_files;
extern char** srv_data_file_names; extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes; extern ulint* srv_data_file_sizes;
......
...@@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri ...@@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri
extern my_bool timed_mutexes; extern my_bool timed_mutexes;
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
#ifdef HAVE_WINDOWS_ATOMICS #ifdef _WIN32
typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
on LONG variable */ on LONG variable */
#else #else
......
...@@ -121,7 +121,7 @@ struct os_aio_slot_struct{ ...@@ -121,7 +121,7 @@ struct os_aio_slot_struct{
which pending aio operation was which pending aio operation was
completed */ completed */
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
os_event_t event; /*!< event object we need in the HANDLE handle; /*!< handle object we need in the
OVERLAPPED struct */ OVERLAPPED struct */
OVERLAPPED control; /*!< Windows control block for the OVERLAPPED control; /*!< Windows control block for the
aio request */ aio request */
...@@ -155,7 +155,7 @@ struct os_aio_array_struct{ ...@@ -155,7 +155,7 @@ struct os_aio_array_struct{
aio array outside the ibuf segment */ aio array outside the ibuf segment */
os_aio_slot_t* slots; /*!< Pointer to the slots in the array */ os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
#ifdef __WIN__ #ifdef __WIN__
os_native_event_t* native_events; HANDLE* handles;
/*!< Pointer to an array of OS native /*!< Pointer to an array of OS native
event handles where we copied the event handles where we copied the
handles from slots, in the same handles from slots, in the same
...@@ -229,10 +229,16 @@ os_get_os_version(void) ...@@ -229,10 +229,16 @@ os_get_os_version(void)
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
return(OS_WIN95); return(OS_WIN95);
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
if (os_info.dwMajorVersion <= 4) { switch(os_info.dwMajorVersion){
return(OS_WINNT); case 3:
} else { case 4:
return(OS_WIN2000); return OS_WINNT;
case 5:
return (os_info.dwMinorVersion == 0)?OS_WIN2000 : OS_WINXP;
case 6:
return (os_info.dwMinorVersion == 0)?OS_WINVISTA : OS_WIN7;
default:
return OS_WIN7;
} }
} else { } else {
ut_error; ut_error;
...@@ -2272,13 +2278,12 @@ os_file_read( ...@@ -2272,13 +2278,12 @@ os_file_read(
#ifdef __WIN__ #ifdef __WIN__
BOOL ret; BOOL ret;
DWORD len; DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry; ibool retry;
#ifndef UNIV_HOTBACKUP OVERLAPPED overlapped;
ulint i;
#endif /* !UNIV_HOTBACKUP */ memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */ no more than 32 bits. */
...@@ -2293,40 +2298,11 @@ os_file_read( ...@@ -2293,40 +2298,11 @@ os_file_read(
ut_ad(buf); ut_ad(buf);
ut_ad(n > 0); ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++; os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--; os_n_pending_reads--;
...@@ -2355,9 +2331,6 @@ os_file_read( ...@@ -2355,9 +2331,6 @@ os_file_read(
(ulong)n, (ulong)offset_high, (ulong)n, (ulong)offset_high,
(ulong)offset, (long)ret); (ulong)offset, (long)ret);
#endif /* __WIN__ */ #endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error(NULL, "read"); retry = os_file_handle_error(NULL, "read");
if (retry) { if (retry) {
...@@ -2399,13 +2372,13 @@ os_file_read_no_error_handling( ...@@ -2399,13 +2372,13 @@ os_file_read_no_error_handling(
#ifdef __WIN__ #ifdef __WIN__
BOOL ret; BOOL ret;
DWORD len; DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry; ibool retry;
#ifndef UNIV_HOTBACKUP OVERLAPPED overlapped;
ulint i;
#endif /* !UNIV_HOTBACKUP */ memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */ no more than 32 bits. */
...@@ -2420,40 +2393,12 @@ os_file_read_no_error_handling( ...@@ -2420,40 +2393,12 @@ os_file_read_no_error_handling(
ut_ad(buf); ut_ad(buf);
ut_ad(n > 0); ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++; os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--; os_n_pending_reads--;
...@@ -2476,9 +2421,6 @@ os_file_read_no_error_handling( ...@@ -2476,9 +2421,6 @@ os_file_read_no_error_handling(
return(TRUE); return(TRUE);
} }
#endif /* __WIN__ */ #endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error_no_exit(NULL, "read"); retry = os_file_handle_error_no_exit(NULL, "read");
if (retry) { if (retry) {
...@@ -2531,14 +2473,14 @@ os_file_write( ...@@ -2531,14 +2473,14 @@ os_file_write(
#ifdef __WIN__ #ifdef __WIN__
BOOL ret; BOOL ret;
DWORD len; DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ulint n_retries = 0; ulint n_retries = 0;
ulint err; ulint err;
#ifndef UNIV_HOTBACKUP OVERLAPPED overlapped;
ulint i;
#endif /* !UNIV_HOTBACKUP */ memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */ no more than 32 bits. */
...@@ -2551,50 +2493,12 @@ os_file_write( ...@@ -2551,50 +2493,12 @@ os_file_write(
ut_ad(buf); ut_ad(buf);
ut_ad(n > 0); ut_ad(n > 0);
retry: retry:
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_writes++; os_n_pending_writes++;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP ret = WriteFile(file, buf, (DWORD) n, &len, &overlapped);
/* Protect the seek / write operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex);
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: File pointer positioning to"
" file %s failed at\n"
"InnoDB: offset %lu %lu. Operating system"
" error number %lu.\n"
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
REFMAN "operating-system-error-codes.html\n",
name, (ulong) offset_high, (ulong) offset,
(ulong) GetLastError());
return(FALSE);
}
ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
/* Always do fsync to reduce the probability that when the OS crashes, /* Always do fsync to reduce the probability that when the OS crashes,
a database page is only partially physically written to disk. */ a database page is only partially physically written to disk. */
...@@ -2605,10 +2509,6 @@ os_file_write( ...@@ -2605,10 +2509,6 @@ os_file_write(
} }
# endif /* UNIV_DO_FLUSH */ # endif /* UNIV_DO_FLUSH */
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--; os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
...@@ -3012,7 +2912,7 @@ os_aio_array_create( ...@@ -3012,7 +2912,7 @@ os_aio_array_create(
array->n_reserved = 0; array->n_reserved = 0;
array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
#ifdef __WIN__ #ifdef __WIN__
array->native_events = ut_malloc(n * sizeof(os_native_event_t)); array->handles = ut_malloc(n * sizeof(HANDLE));
#endif #endif
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i); slot = os_aio_array_get_nth_slot(array, i);
...@@ -3020,13 +2920,14 @@ os_aio_array_create( ...@@ -3020,13 +2920,14 @@ os_aio_array_create(
slot->pos = i; slot->pos = i;
slot->reserved = FALSE; slot->reserved = FALSE;
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
slot->event = os_event_create(NULL); slot->handle= CreateEvent(NULL,TRUE, FALSE, NULL);
over = &(slot->control); over = &(slot->control);
over->hEvent = slot->event->handle; over->hEvent = slot->handle;
*((array->native_events) + i) = over->hEvent; *((array->handles) + i) = over->hEvent;
#endif #endif
} }
...@@ -3046,12 +2947,12 @@ os_aio_array_free( ...@@ -3046,12 +2947,12 @@ os_aio_array_free(
for (i = 0; i < array->n_slots; i++) { for (i = 0; i < array->n_slots; i++) {
os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
os_event_free(slot->event); CloseHandle(slot->handle);
} }
#endif /* WIN_ASYNC_IO */ #endif /* WIN_ASYNC_IO */
#ifdef __WIN__ #ifdef __WIN__
ut_free(array->native_events); ut_free(array->handles);
#endif /* __WIN__ */ #endif /* __WIN__ */
os_mutex_free(array->mutex); os_mutex_free(array->mutex);
os_event_free(array->not_full); os_event_free(array->not_full);
...@@ -3174,7 +3075,8 @@ os_aio_array_wake_win_aio_at_shutdown( ...@@ -3174,7 +3075,8 @@ os_aio_array_wake_win_aio_at_shutdown(
for (i = 0; i < array->n_slots; i++) { for (i = 0; i < array->n_slots; i++) {
os_event_set((array->slots + i)->event); SetEvent(array->slots[i].handle);
} }
} }
#endif #endif
...@@ -3396,7 +3298,7 @@ os_aio_array_reserve_slot( ...@@ -3396,7 +3298,7 @@ os_aio_array_reserve_slot(
control = &(slot->control); control = &(slot->control);
control->Offset = (DWORD)offset; control->Offset = (DWORD)offset;
control->OffsetHigh = (DWORD)offset_high; control->OffsetHigh = (DWORD)offset_high;
os_event_reset(slot->event); ResetEvent(slot->handle);
#endif #endif
os_mutex_exit(array->mutex); os_mutex_exit(array->mutex);
...@@ -3433,7 +3335,7 @@ os_aio_array_free_slot( ...@@ -3433,7 +3335,7 @@ os_aio_array_free_slot(
} }
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
os_event_reset(slot->event); ResetEvent(slot->handle);
#endif #endif
os_mutex_exit(array->mutex); os_mutex_exit(array->mutex);
} }
...@@ -3793,15 +3695,18 @@ os_aio_windows_handle( ...@@ -3793,15 +3695,18 @@ os_aio_windows_handle(
n = array->n_slots / array->n_segments; n = array->n_slots / array->n_segments;
if (array == os_aio_sync_array) { if (array == os_aio_sync_array) {
os_event_wait(os_aio_array_get_nth_slot(array, pos)->event); WaitForSingleObject(os_aio_array_get_nth_slot(array, pos)->handle,INFINITE);
i = pos; i = pos;
} else { } else {
srv_set_io_thread_op_info(orig_seg, "wait Windows aio"); srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
i = os_event_wait_multiple(n, i = WaitForMultipleObjects((DWORD) n, array->handles + segment * n, FALSE, INFINITE);
(array->native_events)
+ segment * n);
} }
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
os_mutex_enter(array->mutex); os_mutex_enter(array->mutex);
slot = os_aio_array_get_nth_slot(array, i + segment * n); slot = os_aio_array_get_nth_slot(array, i + segment * n);
......
This diff is collapsed.
...@@ -136,6 +136,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX; ...@@ -136,6 +136,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
/** Place locks to records only i.e. do not use next-key locking except /** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */ on duplicate key checking and foreign key checking */
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE; UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
#ifdef __WIN__
/* Windows native condition variables. We use runtime loading / function
pointers, because they are not available on Windows Server 2003 and
Windows XP/2000.
We use condition for events on Windows if possible, even if os_event
resembles Windows kernel event object well API-wise. The reason is
performance, kernel objects are heavyweights and WaitForSingleObject() is a
performance killer causing calling thread to context switch. Besides, Innodb
is preallocating large number (often millions) of os_events. With kernel event
objects it takes a big chunk out of non-paged pool, which is better suited
for tasks like IO than for storing idle event objects. */
UNIV_INTERN ibool srv_use_native_conditions = FALSE;
#endif /* __WIN__ */
UNIV_INTERN ulint srv_n_data_files = 0; UNIV_INTERN ulint srv_n_data_files = 0;
UNIV_INTERN char** srv_data_file_names = NULL; UNIV_INTERN char** srv_data_file_names = NULL;
......
...@@ -1132,19 +1132,25 @@ innobase_start_or_create_for_mysql(void) ...@@ -1132,19 +1132,25 @@ innobase_start_or_create_for_mysql(void)
case OS_WIN95: case OS_WIN95:
case OS_WIN31: case OS_WIN31:
case OS_WINNT: case OS_WINNT:
/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1, srv_use_native_conditions = FALSE;
and NT use simulated aio. In NT Windows provides async i/o,
but when run in conjunction with InnoDB Hot Backup, it seemed
to corrupt the data files. */
os_aio_use_native_aio = FALSE;
break; break;
default:
/* On Win 2000 and XP use async i/o */ case OS_WIN2000:
case OS_WINXP:
/* On 2000 and XP, async IO is available, but no condition variables. */
os_aio_use_native_aio = TRUE; os_aio_use_native_aio = TRUE;
break; srv_use_native_conditions = FALSE;
} break;
default:
/* On Win 2000 and XP use async i/o */
/* Vista and later have both async IO and condition variables */
os_aio_use_native_aio = TRUE;
srv_use_native_conditions = TRUE;
break;
}
#endif #endif
if (srv_file_flush_method_str == NULL) { if (srv_file_flush_method_str == NULL) {
/* These are the default options */ /* These are the default options */
......
...@@ -15,20 +15,10 @@ ...@@ -15,20 +15,10 @@
# This is the CMakeLists for InnoDB Plugin # This is the CMakeLists for InnoDB Plugin
# Starting at 5.1.38, MySQL CMake files are simplified. But the plugin
# CMakeLists.txt still needs to work with previous versions of MySQL.
IF (MYSQL_VERSION_ID GREATER "50137")
INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
ENDIF (MYSQL_VERSION_ID GREATER "50137")
IF (CMAKE_SIZEOF_VOID_P MATCHES 8) IF (CMAKE_SIZEOF_VOID_P MATCHES 8)
SET(WIN64 TRUE) SET(WIN64 TRUE)
ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8) ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8)
ADD_DEFINITIONS(-D_WIN32 -D_LIB -DMYSQL_SERVER)
# Include directories under xtradb # Include directories under xtradb
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/xtradb/include INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/xtradb/include
${CMAKE_SOURCE_DIR}/storage/xtradb/handler) ${CMAKE_SOURCE_DIR}/storage/xtradb/handler)
...@@ -89,9 +79,6 @@ SET(XTRADB_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c ...@@ -89,9 +79,6 @@ SET(XTRADB_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
usr/usr0sess.c usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
ut/ut0list.c ut/ut0wqueue.c) ut/ut0list.c ut/ut0wqueue.c)
# Windows atomics do not perform well. Disable Windows atomics by default.
# See bug#52102 for details.
#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION)
MYSQL_STORAGE_ENGINE(XTRADB) MYSQL_STORAGE_ENGINE(XTRADB)
...@@ -183,6 +183,10 @@ log. */ ...@@ -183,6 +183,10 @@ log. */
#define OS_WIN95 2 /*!< Microsoft Windows 95 */ #define OS_WIN95 2 /*!< Microsoft Windows 95 */
#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */ #define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */ #define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
#define OS_WINXP 5 /*!< Microsoft Windows XP */
#define OS_WINVISTA 6 /*!< Microsoft Windows Vista */
#define OS_WIN7 7 /*!< Microsoft Windows 7 */
extern ulint os_n_file_reads; extern ulint os_n_file_reads;
extern ulint os_n_file_writes; extern ulint os_n_file_writes;
......
...@@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri ...@@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri
#include "univ.i" #include "univ.i"
#include "ut0lst.h" #include "ut0lst.h"
#ifdef __WIN__ #ifdef _WIN32
/** Native event (slow)*/
typedef HANDLE os_native_event_t;
/** Native mutex */ /** Native mutex */
#define os_fast_mutex_t CRITICAL_SECTION typedef CRITICAL_SECTION os_fast_mutex_t;
/** Native condition variable */
/** Native event */ typedef CONDITION_VARIABLE os_cond_t;
typedef HANDLE os_native_event_t;
/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
/** Operating system event handle */
typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
os_native_event_t handle;
/*!< Windows event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
#else #else
/** Native mutex */ /** Native mutex */
typedef pthread_mutex_t os_fast_mutex_t; typedef pthread_mutex_t os_fast_mutex_t;
/** Native condition variable */
typedef pthread_cond_t os_cond_t;
#endif
/** Operating system event */ /** Operating system event */
typedef struct os_event_struct os_event_struct_t; typedef struct os_event_struct os_event_struct_t;
...@@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t; ...@@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */ /** An asynchronous signal sent between threads */
struct os_event_struct { struct os_event_struct {
#ifdef _WIN32
HANDLE handle; /*!< kernel event object, slow, used on older Windows */
#endif
os_fast_mutex_t os_mutex; /*!< this mutex protects the next os_fast_mutex_t os_mutex; /*!< this mutex protects the next
fields */ fields */
ibool is_set; /*!< this is TRUE when the event is ibool is_set; /*!< this is TRUE when the event is
...@@ -76,12 +69,14 @@ struct os_event_struct { ...@@ -76,12 +69,14 @@ struct os_event_struct {
this event */ this event */
ib_int64_t signal_count; /*!< this is incremented each time ib_int64_t signal_count; /*!< this is incremented each time
the event becomes signaled */ the event becomes signaled */
pthread_cond_t cond_var; /*!< condition variable is used in os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */ waiting for the event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list; UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */ /*!< list of all created events */
}; };
#endif
/** Operating system mutex */ /** Operating system mutex */
typedef struct os_mutex_struct os_mutex_str_t; typedef struct os_mutex_struct os_mutex_str_t;
...@@ -186,33 +181,23 @@ os_event_wait_low( ...@@ -186,33 +181,23 @@ os_event_wait_low(
os_event_reset(). */ os_event_reset(). */
#define os_event_wait(event) os_event_wait_low(event, 0) #define os_event_wait(event) os_event_wait_low(event, 0)
#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0)
/**********************************************************//** /**********************************************************//**
Waits for an event object until it is in the signaled state or Waits for an event object until it is in the signaled state or
a timeout is exceeded. a timeout is exceeded. In Unix the timeout is always infinite.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
UNIV_INTERN UNIV_INTERN
ulint ulint
os_event_wait_time( os_event_wait_time_low(
/*===============*/
os_event_t event, /*!< in: event to wait */
ulint wtime); /*!< in: timeout in microseconds, or
OS_SYNC_INFINITE_TIME */
#ifdef __WIN__
/**********************************************************//**
Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled.
@return index of the event which was signaled */
UNIV_INTERN
ulint
os_event_wait_multiple(
/*===================*/ /*===================*/
ulint n, /*!< in: number of events in the os_event_t event, /*!< in: event to wait */
array */ ulint time_in_usec, /*!< in: timeout in
os_native_event_t* native_event_array); microseconds, or
/*!< in: pointer to an array of event OS_SYNC_INFINITE_TIME */
handles */ ib_int64_t reset_sig_count); /*!< in: zero or the value
#endif returned by previous call of
os_event_reset(). */
/*********************************************************//** /*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the Creates an operating system mutex semaphore. Because these are slow, the
mutex semaphore of InnoDB itself (mutex_t) should be used where possible. mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
...@@ -385,7 +370,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */ ...@@ -385,7 +370,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
# define os_atomic_test_and_set_byte(ptr, new_val) \ # define os_atomic_test_and_set_byte(ptr, new_val) \
atomic_swap_uchar(ptr, new_val) atomic_swap_uchar(ptr, new_val)
#elif defined(HAVE_WINDOWS_ATOMICS) #elif defined(_WIN32)
#define HAVE_ATOMIC_BUILTINS #define HAVE_ATOMIC_BUILTINS
......
...@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri ...@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri
#endif #endif
/**********************************************************//** /**********************************************************//**
Acquires ownership of a fast mutex. Currently in Windows this is the same Acquires ownership of a fast mutex.
as os_fast_mutex_lock!
@return 0 if success, != 0 if was reserved by another thread */ @return 0 if success, != 0 if was reserved by another thread */
UNIV_INLINE UNIV_INLINE
ulint ulint
...@@ -38,9 +37,9 @@ os_fast_mutex_trylock( ...@@ -38,9 +37,9 @@ os_fast_mutex_trylock(
os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */ os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
{ {
#ifdef __WIN__ #ifdef __WIN__
EnterCriticalSection(fast_mutex); if (TryEnterCriticalSection(fast_mutex))
return 0;
return(0); return(1);
#else #else
/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
so that it returns 0 on success. In the operating system so that it returns 0 on success. In the operating system
......
...@@ -112,7 +112,9 @@ extern ulint srv_check_file_format_at_startup; ...@@ -112,7 +112,9 @@ extern ulint srv_check_file_format_at_startup;
on duplicate key checking and foreign key checking */ on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog; extern ibool srv_locks_unsafe_for_binlog;
#endif /* !UNIV_HOTBACKUP */ #endif /* !UNIV_HOTBACKUP */
#ifdef __WIN__
extern ibool srv_use_native_conditions;
#endif
extern ulint srv_n_data_files; extern ulint srv_n_data_files;
extern char** srv_data_file_names; extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes; extern ulint* srv_data_file_sizes;
......
...@@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri ...@@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri
extern my_bool timed_mutexes; extern my_bool timed_mutexes;
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
#ifdef HAVE_WINDOWS_ATOMICS #ifdef _WIN32
typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
on LONG variable */ on LONG variable */
#else #else
......
...@@ -149,7 +149,7 @@ struct os_aio_slot_struct{ ...@@ -149,7 +149,7 @@ struct os_aio_slot_struct{
which pending aio operation was which pending aio operation was
completed */ completed */
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
os_event_t event; /*!< event object we need in the HANDLE handle; /*!< handle object we need in the
OVERLAPPED struct */ OVERLAPPED struct */
OVERLAPPED control; /*!< Windows control block for the OVERLAPPED control; /*!< Windows control block for the
aio request */ aio request */
...@@ -183,7 +183,7 @@ struct os_aio_array_struct{ ...@@ -183,7 +183,7 @@ struct os_aio_array_struct{
aio array outside the ibuf segment */ aio array outside the ibuf segment */
os_aio_slot_t* slots; /*!< Pointer to the slots in the array */ os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
#ifdef __WIN__ #ifdef __WIN__
os_native_event_t* native_events; HANDLE* handles;
/*!< Pointer to an array of OS native /*!< Pointer to an array of OS native
event handles where we copied the event handles where we copied the
handles from slots, in the same handles from slots, in the same
...@@ -270,10 +270,16 @@ os_get_os_version(void) ...@@ -270,10 +270,16 @@ os_get_os_version(void)
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
return(OS_WIN95); return(OS_WIN95);
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
if (os_info.dwMajorVersion <= 4) { switch(os_info.dwMajorVersion){
return(OS_WINNT); case 3:
} else { case 4:
return(OS_WIN2000); return OS_WINNT;
case 5:
return (os_info.dwMinorVersion == 0)?OS_WIN2000 : OS_WINXP;
case 6:
return (os_info.dwMinorVersion == 0)?OS_WINVISTA : OS_WIN7;
default:
return OS_WIN7;
} }
} else { } else {
ut_error; ut_error;
...@@ -2350,13 +2356,12 @@ _os_file_read( ...@@ -2350,13 +2356,12 @@ _os_file_read(
#ifdef __WIN__ #ifdef __WIN__
BOOL ret; BOOL ret;
DWORD len; DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry; ibool retry;
#ifndef UNIV_HOTBACKUP OVERLAPPED overlapped;
ulint i;
#endif /* !UNIV_HOTBACKUP */ memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */ no more than 32 bits. */
...@@ -2371,40 +2376,11 @@ _os_file_read( ...@@ -2371,40 +2376,11 @@ _os_file_read(
ut_ad(buf); ut_ad(buf);
ut_ad(n > 0); ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++; os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--; os_n_pending_reads--;
...@@ -2433,9 +2409,7 @@ _os_file_read( ...@@ -2433,9 +2409,7 @@ _os_file_read(
(ulong)n, (ulong)offset_high, (ulong)n, (ulong)offset_high,
(ulong)offset, (long)ret); (ulong)offset, (long)ret);
#endif /* __WIN__ */ #endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error(NULL, "read"); retry = os_file_handle_error(NULL, "read");
if (retry) { if (retry) {
...@@ -2477,13 +2451,13 @@ os_file_read_no_error_handling( ...@@ -2477,13 +2451,13 @@ os_file_read_no_error_handling(
#ifdef __WIN__ #ifdef __WIN__
BOOL ret; BOOL ret;
DWORD len; DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry; ibool retry;
#ifndef UNIV_HOTBACKUP OVERLAPPED overlapped;
ulint i;
#endif /* !UNIV_HOTBACKUP */ memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */ no more than 32 bits. */
...@@ -2498,40 +2472,11 @@ os_file_read_no_error_handling( ...@@ -2498,40 +2472,11 @@ os_file_read_no_error_handling(
ut_ad(buf); ut_ad(buf);
ut_ad(n > 0); ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++; os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--; os_n_pending_reads--;
...@@ -2554,9 +2499,6 @@ os_file_read_no_error_handling( ...@@ -2554,9 +2499,6 @@ os_file_read_no_error_handling(
return(TRUE); return(TRUE);
} }
#endif /* __WIN__ */ #endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error_no_exit(NULL, "read"); retry = os_file_handle_error_no_exit(NULL, "read");
if (retry) { if (retry) {
...@@ -2609,14 +2551,13 @@ os_file_write( ...@@ -2609,14 +2551,13 @@ os_file_write(
#ifdef __WIN__ #ifdef __WIN__
BOOL ret; BOOL ret;
DWORD len; DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ulint n_retries = 0; ulint n_retries = 0;
ulint err; ulint err;
#ifndef UNIV_HOTBACKUP OVERLAPPED overlapped;
ulint i;
#endif /* !UNIV_HOTBACKUP */ memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */ no more than 32 bits. */
...@@ -2629,50 +2570,12 @@ os_file_write( ...@@ -2629,50 +2570,12 @@ os_file_write(
ut_ad(buf); ut_ad(buf);
ut_ad(n > 0); ut_ad(n > 0);
retry: retry:
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_writes++; os_n_pending_writes++;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP ret = WriteFile(file, buf, (DWORD) n, &len, &overlapped);
/* Protect the seek / write operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex);
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: File pointer positioning to"
" file %s failed at\n"
"InnoDB: offset %lu %lu. Operating system"
" error number %lu.\n"
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
REFMAN "operating-system-error-codes.html\n",
name, (ulong) offset_high, (ulong) offset,
(ulong) GetLastError());
return(FALSE);
}
ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
/* Always do fsync to reduce the probability that when the OS crashes, /* Always do fsync to reduce the probability that when the OS crashes,
a database page is only partially physically written to disk. */ a database page is only partially physically written to disk. */
...@@ -2683,10 +2586,6 @@ os_file_write( ...@@ -2683,10 +2586,6 @@ os_file_write(
} }
# endif /* UNIV_DO_FLUSH */ # endif /* UNIV_DO_FLUSH */
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--; os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
...@@ -3090,7 +2989,7 @@ os_aio_array_create( ...@@ -3090,7 +2989,7 @@ os_aio_array_create(
array->n_reserved = 0; array->n_reserved = 0;
array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
#ifdef __WIN__ #ifdef __WIN__
array->native_events = ut_malloc(n * sizeof(os_native_event_t)); array->handles = ut_malloc(n * sizeof(HANDLE));
#endif #endif
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i); slot = os_aio_array_get_nth_slot(array, i);
...@@ -3098,13 +2997,14 @@ os_aio_array_create( ...@@ -3098,13 +2997,14 @@ os_aio_array_create(
slot->pos = i; slot->pos = i;
slot->reserved = FALSE; slot->reserved = FALSE;
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
slot->event = os_event_create(NULL); slot->handle= CreateEvent(NULL,TRUE, FALSE, NULL);
over = &(slot->control); over = &(slot->control);
over->hEvent = slot->event->handle; over->hEvent = slot->handle;
*((array->native_events) + i) = over->hEvent; *((array->handles) + i) = over->hEvent;
#endif #endif
} }
...@@ -3124,12 +3024,12 @@ os_aio_array_free( ...@@ -3124,12 +3024,12 @@ os_aio_array_free(
for (i = 0; i < array->n_slots; i++) { for (i = 0; i < array->n_slots; i++) {
os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
os_event_free(slot->event); CloseHandle(slot->handle);
} }
#endif /* WIN_ASYNC_IO */ #endif /* WIN_ASYNC_IO */
#ifdef __WIN__ #ifdef __WIN__
ut_free(array->native_events); ut_free(array->handles);
#endif /* __WIN__ */ #endif /* __WIN__ */
os_mutex_free(array->mutex); os_mutex_free(array->mutex);
os_event_free(array->not_full); os_event_free(array->not_full);
...@@ -3255,7 +3155,8 @@ os_aio_array_wake_win_aio_at_shutdown( ...@@ -3255,7 +3155,8 @@ os_aio_array_wake_win_aio_at_shutdown(
for (i = 0; i < array->n_slots; i++) { for (i = 0; i < array->n_slots; i++) {
os_event_set((array->slots + i)->event); SetEvent(array->slots[i].handle);
} }
} }
#endif #endif
...@@ -3480,7 +3381,7 @@ os_aio_array_reserve_slot( ...@@ -3480,7 +3381,7 @@ os_aio_array_reserve_slot(
control = &(slot->control); control = &(slot->control);
control->Offset = (DWORD)offset; control->Offset = (DWORD)offset;
control->OffsetHigh = (DWORD)offset_high; control->OffsetHigh = (DWORD)offset_high;
os_event_reset(slot->event); ResetEvent(slot->handle);
#endif #endif
os_mutex_exit(array->mutex); os_mutex_exit(array->mutex);
...@@ -3518,7 +3419,7 @@ os_aio_array_free_slot( ...@@ -3518,7 +3419,7 @@ os_aio_array_free_slot(
} }
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
os_event_reset(slot->event); ResetEvent(slot->handle);
#endif #endif
os_mutex_exit(array->mutex); os_mutex_exit(array->mutex);
} }
...@@ -3906,15 +3807,18 @@ os_aio_windows_handle( ...@@ -3906,15 +3807,18 @@ os_aio_windows_handle(
n = array->n_slots; n = array->n_slots;
if (array == os_aio_sync_array) { if (array == os_aio_sync_array) {
os_event_wait(os_aio_array_get_nth_slot(array, pos)->event); WaitForSingleObject(os_aio_array_get_nth_slot(array, pos)->handle,INFINITE);
i = pos; i = pos;
} else { } else {
srv_set_io_thread_op_info(orig_seg, "wait Windows aio"); srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
i = os_event_wait_multiple(n, i = WaitForMultipleObjects((DWORD) n, array->handles + segment * n, FALSE, INFINITE);
(array->native_events)
);
} }
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
os_mutex_enter(array->mutex); os_mutex_enter(array->mutex);
slot = os_aio_array_get_nth_slot(array, i); slot = os_aio_array_get_nth_slot(array, i);
......
This diff is collapsed.
...@@ -139,6 +139,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX; ...@@ -139,6 +139,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
/** Place locks to records only i.e. do not use next-key locking except /** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */ on duplicate key checking and foreign key checking */
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE; UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
#ifdef __WIN__
/* Windows native condition variables. We use runtime loading / function
pointers, because they are not available on Windows Server 2003 and
Windows XP/2000.
We use condition for events on Windows if possible, even if os_event
resembles Windows kernel event object well API-wise. The reason is
performance, kernel objects are heavyweights and WaitForSingleObject() is a
performance killer causing calling thread to context switch. Besides, Innodb
is preallocating large number (often millions) of os_events. With kernel event
objects it takes a big chunk out of non-paged pool, which is better suited
for tasks like IO than for storing idle event objects. */
UNIV_INTERN ibool srv_use_native_conditions = FALSE;
#endif /* __WIN__ */
UNIV_INTERN ulint srv_n_data_files = 0; UNIV_INTERN ulint srv_n_data_files = 0;
UNIV_INTERN char** srv_data_file_names = NULL; UNIV_INTERN char** srv_data_file_names = NULL;
......
...@@ -1265,23 +1265,23 @@ innobase_start_or_create_for_mysql(void) ...@@ -1265,23 +1265,23 @@ innobase_start_or_create_for_mysql(void)
case OS_WIN95: case OS_WIN95:
case OS_WIN31: case OS_WIN31:
case OS_WINNT: case OS_WINNT:
/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1, srv_use_native_conditions = FALSE;
and NT use simulated aio. In NT Windows provides async i/o,
but when run in conjunction with InnoDB Hot Backup, it seemed
to corrupt the data files. */
os_aio_use_native_aio = FALSE;
break; break;
default:
case OS_WIN2000:
case OS_WINXP:
/* On 2000 and XP, async IO is available, but no condition variables. */
os_aio_use_native_aio = TRUE;
srv_use_native_conditions = FALSE;
break;
default:
/* On Win 2000 and XP use async i/o */ /* On Win 2000 and XP use async i/o */
//os_aio_use_native_aio = TRUE; /* Vista and later have both async IO and condition variables */
os_aio_use_native_aio = FALSE; os_aio_use_native_aio = TRUE;
fprintf(stderr, srv_use_native_conditions = TRUE;
"InnoDB: Windows native async i/o is disabled as default.\n" break;
"InnoDB: It is not applicable for the current" }
" multi io threads implementation.\n");
break;
}
#endif #endif
if (srv_file_flush_method_str == NULL) { if (srv_file_flush_method_str == NULL) {
/* These are the default options */ /* These are the default options */
...@@ -1289,6 +1289,10 @@ innobase_start_or_create_for_mysql(void) ...@@ -1289,6 +1289,10 @@ innobase_start_or_create_for_mysql(void)
srv_unix_file_flush_method = SRV_UNIX_FSYNC; srv_unix_file_flush_method = SRV_UNIX_FSYNC;
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
#ifdef __WIN__
srv_n_read_io_threads = srv_n_write_io_threads = 1;
#endif
#ifndef __WIN__ #ifndef __WIN__
} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) { } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
srv_unix_file_flush_method = SRV_UNIX_FSYNC; srv_unix_file_flush_method = SRV_UNIX_FSYNC;
...@@ -1315,16 +1319,7 @@ innobase_start_or_create_for_mysql(void) ...@@ -1315,16 +1319,7 @@ innobase_start_or_create_for_mysql(void)
} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) { } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
os_aio_use_native_aio = FALSE; os_aio_use_native_aio = FALSE;
}
} else if (0 == ut_strcmp(srv_file_flush_method_str,
"async_unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
os_aio_use_native_aio = TRUE;
srv_n_read_io_threads = srv_n_write_io_threads = 1;
fprintf(stderr,
"InnoDB: 'async_unbuffered' was detected as innodb_flush_method.\n"
"InnoDB: Windows native async i/o is enabled.\n"
"InnoDB: And io threads are restricted.\n");
#endif #endif
} else { } else {
fprintf(stderr, fprintf(stderr,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment