Commit 0cd2b4c2 authored by Eugene Kosov's avatar Eugene Kosov

MDEV-22177 more fsync() -> fdatasync() in InnoDB

Replace all fsync() with fdatasync() when possible (e.g. On Linux)

InnoDB doesn't care about file timestamps. So, to achieve a better
performance it makes sense to use fdatasync() everywhere.

file_io::flush(): renamed from flush_data_only()

os_file_flush_data(): removed

os_file_sync_posix(): renamed from os_file_fsync_posix(). Now it uses
fdatasync() when it's available.
parent 9bd98f45
......@@ -18566,7 +18566,7 @@ checkpoint_now_set(THD*, st_mysql_sys_var*, void*, const void* save)
+ SIZE_OF_FILE_CHECKPOINT
< log_sys.get_lsn()) {
log_make_checkpoint();
log_sys.log.flush_data_only();
log_sys.log.flush();
}
dberr_t err = fil_write_flushed_lsn(log_sys.get_lsn());
......
......@@ -442,9 +442,9 @@ class file_io
virtual dberr_t read(os_offset_t offset, span<byte> buf) noexcept= 0;
virtual dberr_t write(const char *path, os_offset_t offset,
span<const byte> buf) noexcept= 0;
virtual dberr_t flush_data_only() noexcept= 0;
virtual dberr_t flush() noexcept= 0;
/** Durable writes doesn't require calling flush_data_only() */
/** Durable writes doesn't require calling flush() */
bool writes_are_durable() const noexcept { return m_durable_writes; }
protected:
......@@ -468,7 +468,7 @@ class file_os_io final: public file_io
dberr_t read(os_offset_t offset, span<byte> buf) noexcept final;
dberr_t write(const char *path, os_offset_t offset,
span<const byte> buf) noexcept final;
dberr_t flush_data_only() noexcept final;
dberr_t flush() noexcept final;
private:
pfs_os_file_t m_fd{OS_FILE_CLOSED};
......@@ -490,7 +490,7 @@ class log_file_t
dberr_t read(os_offset_t offset, span<byte> buf) noexcept;
bool writes_are_durable() const noexcept;
dberr_t write(os_offset_t offset, span<const byte> buf) noexcept;
dberr_t flush_data_only() noexcept;
dberr_t flush() noexcept;
private:
std::unique_ptr<file_io> m_file;
......@@ -596,14 +596,14 @@ struct log_t{
@param[in] offset offset in log file
@param[in] buf buffer where to read */
void read(os_offset_t offset, span<byte> buf);
/** Tells whether writes require calling flush_data_only() */
/** Tells whether writes require calling flush() */
bool writes_are_durable() const noexcept;
/** writes buffer to log file
@param[in] offset offset in log file
@param[in] buf buffer from which to write */
void write(os_offset_t offset, span<byte> buf);
/** flushes OS page cache (excluding metadata!) for log file */
void flush_data_only();
void flush();
/** closes log file */
void close_file();
......
......@@ -744,9 +744,6 @@ The wrapper functions have the prefix of "innodb_". */
# define os_file_flush(file) \
pfs_os_file_flush_func(file, __FILE__, __LINE__)
#define os_file_flush_data(file) \
pfs_os_file_flush_data_func(file, __FILE__, __LINE__)
# define os_file_rename(key, oldpath, newpath) \
pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
......@@ -992,17 +989,6 @@ pfs_os_file_flush_func(
const char* src_file,
uint src_line);
/** NOTE! Please use the corresponding macro os_file_flush_data(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_flush_data() which flushes only(!) data (excluding metadata) from OS
page cache of a given file to the disk.
@param[in] file Open file handle
@param[in] src_file file name where func invoked
@param[in] src_line line where the func invoked
@return true if success */
bool pfs_os_file_flush_data_func(pfs_os_file_t file, const char *src_file,
uint src_line);
/** NOTE! Please use the corresponding macro os_file_rename(), not directly
this function!
......@@ -1098,8 +1084,6 @@ to original un-instrumented file I/O APIs */
# define os_file_flush(file) os_file_flush_func(file)
#define os_file_flush_data(file) os_file_flush_data_func(file)
# define os_file_rename(key, oldpath, newpath) \
os_file_rename_func(oldpath, newpath)
......@@ -1179,14 +1163,6 @@ bool
os_file_flush_func(
os_file_t file);
/** NOTE! Use the corresponding macro os_file_flush_data(), not directly this
function!
Flushes only(!) data (excluding metadata) from OS page cache of a given file to
the disk.
@param[in] file handle to a file
@return true if success */
bool os_file_flush_data_func(os_file_t file);
/** Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
overwrite the error number). If the number is not known to this program,
......
......@@ -633,9 +633,9 @@ dberr_t file_os_io::write(const char *path, os_offset_t offset,
buf.size());
}
dberr_t file_os_io::flush_data_only() noexcept
dberr_t file_os_io::flush() noexcept
{
return os_file_flush_data(m_fd) ? DB_SUCCESS : DB_ERROR;
return os_file_flush(m_fd) ? DB_SUCCESS : DB_ERROR;
}
#ifdef HAVE_PMEM
......@@ -674,7 +674,7 @@ class file_pmem_io final : public file_io
pmem_memcpy_persist(m_file.data() + offset, buf.data(), buf.size());
return DB_SUCCESS;
}
dberr_t flush_data_only() noexcept final
dberr_t flush() noexcept final
{
ut_ad(0);
return DB_SUCCESS;
......@@ -746,10 +746,10 @@ dberr_t log_file_t::write(os_offset_t offset, span<const byte> buf) noexcept
return m_file->write(m_path.c_str(), offset, buf);
}
dberr_t log_file_t::flush_data_only() noexcept
dberr_t log_file_t::flush() noexcept
{
ut_ad(is_opened());
return m_file->flush_data_only();
return m_file->flush();
}
void log_t::file::open_file(std::string path)
......@@ -788,7 +788,7 @@ void log_t::file::write_header_durable(lsn_t lsn)
log_sys.log.write(0, buf);
if (!log_sys.log.writes_are_durable())
log_sys.log.flush_data_only();
log_sys.log.flush();
}
void log_t::file::read(os_offset_t offset, span<byte> buf)
......@@ -813,11 +813,11 @@ void log_t::file::write(os_offset_t offset, span<byte> buf)
log_sys.n_log_ios++;
}
void log_t::file::flush_data_only()
void log_t::file::flush()
{
log_sys.pending_flushes.fetch_add(1, std::memory_order_acquire);
if (const dberr_t err= fd.flush_data_only())
ib::fatal() << "flush_data_only(" << fd.get_path() << ") returned " << err;
if (const dberr_t err= fd.flush())
ib::fatal() << "flush(" << fd.get_path() << ") returned " << err;
log_sys.pending_flushes.fetch_sub(1, std::memory_order_release);
log_sys.flushes.fetch_add(1, std::memory_order_release);
}
......@@ -936,7 +936,7 @@ log_write_buf(
and invoke log_mutex_enter(). */
static void log_write_flush_to_disk_low(lsn_t lsn)
{
log_sys.log.flush_data_only();
log_sys.log.flush();
ut_a(lsn >= log_sys.get_flushed_lsn());
log_sys.set_flushed_lsn(lsn);
}
......@@ -1294,7 +1294,7 @@ void log_write_checkpoint_info(lsn_t end_lsn)
: LOG_CHECKPOINT_1,
{buf, OS_FILE_LOG_BLOCK_SIZE});
log_sys.log.flush_data_only();
log_sys.log.flush();
log_mutex_enter();
......@@ -1742,7 +1742,7 @@ logs_empty_and_mark_files_at_shutdown(void)
/* Ensure that all buffered changes are written to the
redo log before fil_close_all_files(). */
log_sys.log.flush_data_only();
log_sys.log.flush();
} else {
lsn = recv_sys.recovered_lsn;
}
......
......@@ -78,6 +78,9 @@ Created 10/21/1995 Heikki Tuuri
#include <my_sys.h>
#endif
#include <thread>
#include <chrono>
/* Per-IO operation environment*/
class io_slots
{
......@@ -879,55 +882,53 @@ os_file_get_last_error_low(
return(OS_FILE_ERROR_MAX + err);
}
/** Wrapper to fsync(2) that retries the call on some errors.
/** Wrapper to fsync() or fdatasync() that retries the call on some errors.
Returns the value 0 if successful; otherwise the value -1 is returned and
the global variable errno is set to indicate the error.
@param[in] file open file handle
@return 0 if success, -1 otherwise */
static
int
os_file_fsync_posix(
os_file_t file)
static int os_file_sync_posix(os_file_t file)
{
ulint failures = 0;
for (;;) {
++os_n_fsyncs;
int ret = fsync(file);
#if !defined(HAVE_FDATASYNC) || HAVE_DECL_FDATASYNC == 0
auto func= fsync;
auto func_name= "fsync()";
#else
auto func= fdatasync;
auto func_name= "fdatasync()";
#endif
if (ret == 0) {
return(ret);
}
ulint failures= 0;
switch(errno) {
case ENOLCK:
for (;;)
{
++os_n_fsyncs;
++failures;
ut_a(failures < 1000);
int ret= func(file);
if (!(failures % 100)) {
if (ret == 0)
return ret;
ib::warn()
<< "fsync(): "
<< "No locks available; retrying";
}
switch (errno)
{
case ENOLCK:
++failures;
ut_a(failures < 1000);
/* 0.2 sec */
os_thread_sleep(200000);
break;
if (!(failures % 100))
ib::warn() << func_name << ": No locks available; retrying";
case EINTR:
std::this_thread::sleep_for(std::chrono::milliseconds(200));
break;
++failures;
ut_a(failures < 2000);
break;
case EINTR:
++failures;
ut_a(failures < 2000);
break;
default:
ib::fatal() << "fsync() returned " << errno;
}
}
default:
ib::fatal() << func_name << " returned " << errno;
}
}
}
/** Check the existence and type of the given file.
......@@ -988,7 +989,7 @@ os_file_flush_func(
int ret;
WAIT_ALLOW_WRITES();
ret = os_file_fsync_posix(file);
ret = os_file_sync_posix(file);
if (ret == 0) {
return(true);
......@@ -4604,32 +4605,3 @@ os_normalize_path(
}
}
}
bool os_file_flush_data_func(os_file_t file) {
#if defined(_WIN32) || !defined(HAVE_FDATASYNC) || HAVE_DECL_FDATASYNC == 0
return os_file_flush_func(file);
#else
bool success= fdatasync(file) != -1;
if (!success) {
ib::error() << "fdatasync() errno: " << errno;
}
return success;
#endif
}
#ifdef UNIV_PFS_IO
bool pfs_os_file_flush_data_func(pfs_os_file_t file, const char *src_file,
uint src_line)
{
PSI_file_locker_state state;
struct PSI_file_locker *locker= NULL;
register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC, src_file,
src_line);
bool success= os_file_flush_data_func(file);
register_pfs_file_io_end(locker, 0);
return success;
}
#endif
......@@ -1033,7 +1033,7 @@ static lsn_t srv_prepare_to_delete_redo_log_file(bool old_exists)
if (flushed_lsn != log_sys.get_flushed_lsn()) {
log_write_up_to(flushed_lsn, false);
log_sys.log.flush_data_only();
log_sys.log.flush();
}
ut_ad(flushed_lsn == log_get_lsn());
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment