#include <toku_portability.h> #include <stdio.h> #include <toku_assert.h> #include <stdint.h> #include <unistd.h> #include <windows.h> #include <toku_atomic.h> #include <toku_time.h> int64_t pread(int fildes, void *buf, size_t nbyte, int64_t offset) { HANDLE filehandle; OVERLAPPED win_offset = {0}; filehandle = (HANDLE)_get_osfhandle(fildes); int64_t r; if (filehandle==INVALID_HANDLE_VALUE) { r = errno; assert(r!=0); goto cleanup; } win_offset.Offset = offset % (1LL<<32LL); win_offset.OffsetHigh = offset / (1LL<<32LL); DWORD bytes_read; r = ReadFile(filehandle, buf, nbyte, &bytes_read, &win_offset); if (!r) { r = GetLastError(); if (r==ERROR_HANDLE_EOF) r = bytes_read; else { errno = r; r = -1; } } else r = bytes_read; // printf("%s: %d %p %u %I64d %I64d\n", __FUNCTION__, fildes, buf, nbyte, offset, r); fflush(stdout); cleanup: return r; } int64_t pwrite(int fildes, const void *buf, size_t nbyte, int64_t offset) { HANDLE filehandle; OVERLAPPED win_offset = {0}; filehandle = (HANDLE)_get_osfhandle(fildes); int64_t r; if (filehandle==INVALID_HANDLE_VALUE) { r = -1; assert(errno!=0); goto cleanup; } win_offset.Offset = offset % (1LL<<32LL); win_offset.OffsetHigh = offset / (1LL<<32LL); DWORD bytes_written; r = WriteFile(filehandle, buf, nbyte, &bytes_written, &win_offset); if (!r) { errno = GetLastError(); if (errno == ERROR_HANDLE_DISK_FULL || errno == ERROR_DISK_FULL) { errno = ENOSPC; } r = -1; } else r = bytes_written; // printf("%s: %d %p %u %I64d %I64d\n", __FUNCTION__, fildes, buf, nbyte, offset, r); fflush(stdout); cleanup: return r; } int fsync(int fd) { int r = _commit(fd); return r; } int ftruncate(int fd, int64_t offset) { int r = _chsize_s(fd, offset); return r; } static ssize_t (*t_pwrite)(int, const void *, size_t, toku_off_t) = 0; static ssize_t (*t_write)(int, const void *, size_t) = 0; int toku_set_func_pwrite (ssize_t (*pwrite_fun)(int, const void *, size_t, toku_off_t)) { t_pwrite = pwrite_fun; return 0; } int toku_set_func_write (ssize_t (*write_fun)(int, const void *, size_t)) { t_write = write_fun; t_write = t_write; //So far unused return 0; } //Print any necessary errors //Return whether we should try the write again. static void try_again_after_handling_write_error(int fd, size_t len, ssize_t r_write) { int try_again = 0; assert(r_write < 0); int errno_write = errno; assert(errno_write != 0); switch (errno_write) { case EINTR: { //The call was interrupted by a signal before any data was written; see signal(7). char err_msg[sizeof("Write of [] bytes to fd=[] interrupted. Retrying.") + 20+10]; //64 bit is 20 chars, 32 bit is 10 chars snprintf(err_msg, sizeof(err_msg), "Write of [%"PRIu64"] bytes to fd=[%d] interrupted. Retrying.", (uint64_t)len, fd); perror(err_msg); fflush(stderr); try_again = 1; break; } case ENOSPC: { char err_msg[sizeof("Failed write of [] bytes to fd=[].") + 20+10]; //64 bit is 20 chars, 32 bit is 10 chars snprintf(err_msg, sizeof(err_msg), "Failed write of [%"PRIu64"] bytes to fd=[%d].", (uint64_t)len, fd); perror(err_msg); fflush(stderr); int out_of_disk_space = 1; assert(!out_of_disk_space); //Give an error message that might be useful if this is the only one that survives. } default: break; } assert(try_again); errno = errno_write; } void toku_os_full_pwrite (int fd, const void *org_buf, size_t len, toku_off_t off) { const uint8_t *buf = org_buf; while (len > 0) { ssize_t r; if (t_pwrite) { r = t_pwrite(fd, buf, len, off); } else { r = pwrite(fd, buf, len, off); } if (r > 0) { len -= r; buf += r; off += r; } else { try_again_after_handling_write_error(fd, len, r); } } assert(len == 0); } /* { ssize_t r; if (t_pwrite) { r = t_pwrite(fd, buf, len, off); } else { r = pwrite(fd, buf, len, off); } if (r==-1 && errno==ENOSPC) { char err_msg[sizeof("Failed write of [] bytes to fd=[].") + 20+10]; //64 bit is 20 chars, 32 bit is 10 chars snprintf(err_msg, sizeof(err_msg), "Failed write of [%"PRIu64"] bytes to fd=[%d].", len, fd); perror(err_msg); fflush(stderr); int out_of_disk_space = 1; assert(!out_of_disk_space); //Give an error message that might be useful if this is the only one that survives. } assert(r==len); } */ void toku_os_full_write (int fd, const void *org_buf, size_t len) { const uint8_t *buf = org_buf; while (len > 0) { ssize_t r; if (t_write) { r = t_write(fd, buf, len); } else { r = write(fd, buf, len); } if (r > 0) { len -= r; buf += r; } else { try_again_after_handling_write_error(fd, len, r); } } assert(len == 0); } // t_fsync exists for testing purposes only static int (*t_fsync)(int) = 0; static uint64_t toku_fsync_count; static uint64_t toku_fsync_time; #if !TOKU_WINDOWS_HAS_ATOMIC_64 static toku_pthread_mutex_t fsync_lock; #endif int toku_fsync_init(void) { int r = 0; #if !TOKU_WINDOWS_HAS_ATOMIC_64 r = toku_pthread_mutex_init(&fsync_lock, NULL); assert(r == 0); #endif return r; } int toku_fsync_destroy(void) { int r = 0; #if !TOKU_WINDOWS_HAS_ATOMIC_64 r = toku_pthread_mutex_destroy(&fsync_lock); assert(r == 0); #endif return r; } int toku_set_func_fsync(int (*fsync_function)(int)) { t_fsync = fsync_function; return 0; } static uint64_t get_tnow(void) { struct timeval tv; int r = gettimeofday(&tv, NULL); assert(r == 0); return tv.tv_sec * 1000000ULL + tv.tv_usec; } // keep trying if fsync fails because of EINTR int toku_file_fsync(int fd) { int r = -1; uint64_t tstart = get_tnow(); while (r != 0) { if (t_fsync) r = t_fsync(fd); else r = fsync(fd); if (r) assert(errno==EINTR); } #if TOKU_WINDOWS_HAS_ATOMIC_64 toku_sync_fetch_and_increment_uint64(&toku_fsync_count); toku_sync_fetch_and_add_uint64(&toku_fsync_time, get_tnow() - tstart); #else //These two need to be fully 64 bit and atomic. //The windows atomic add 64 bit is not available. //toku_sync_fetch_and_add_uint64 (and increment) treat it as 32 bit, and //would overflow. //Even on 32 bit machines, aligned 64 bit writes/writes are atomic, so we just //need to make sure there's only one writer for these two variables. //Protect with a mutex. Fsync is rare/slow enough that this should be ok. int r_mutex; r_mutex = toku_pthread_mutex_lock(&fsync_lock); assert(r_mutex == 0); toku_fsync_count++; toku_fsync_time += get_tnow() - tstart; r_mutex = toku_pthread_mutex_unlock(&fsync_lock); assert(r_mutex == 0); #endif return r; } void toku_get_fsync_times(uint64_t *fsync_count, uint64_t *fsync_time) { *fsync_count = toku_fsync_count; *fsync_time = toku_fsync_time; }