Commit 783625d7 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-24883 add io_uring support for tpool

liburing is a new optional dependency (WITH_URING=auto|yes|no)
that replaces libaio when it is available.

aio_uring: class which wraps io_uring stuff

aio_uring::bind()/unbind(): optional optimization

aio_uring::submit_io(): mutex prevents data race. liburing calls are
thread-unsafe. But if you look into it's implementation you'll see
atomic operations. They're used for synchronization between kernel and
user-space only. That's why our own synchronization is still needed.

For systemd, we add LimitMEMLOCK=524288 (ulimit -l 524288)
because the io_uring_setup system call that is invoked
by io_uring_queue_init() requests locked memory. The value
was found empirically; with 262144, we would occasionally
fail to enable io_uring when using the maximum values of
innodb_read_io_threads=64 and innodb_write_io_threads=64.

aio_uring::thread_routine(): Tolerate -EINTR return from
io_uring_wait_cqe(), because it may occur on shutdown
on Ubuntu 20.10 (Groovy Gorilla).

This was mostly implemented by Eugene Kosov. Systemd integration
and improved startup/shutdown error handling by Marko Mäkelä.
parent 3dfda087
...@@ -174,6 +174,7 @@ INCLUDE(mysql_add_executable) ...@@ -174,6 +174,7 @@ INCLUDE(mysql_add_executable)
INCLUDE(symlinks) INCLUDE(symlinks)
INCLUDE(compile_flags) INCLUDE(compile_flags)
INCLUDE(pmem) INCLUDE(pmem)
INCLUDE(uring)
# Handle options # Handle options
OPTION(DISABLE_SHARED OPTION(DISABLE_SHARED
...@@ -394,7 +395,7 @@ MYSQL_CHECK_READLINE() ...@@ -394,7 +395,7 @@ MYSQL_CHECK_READLINE()
SET(MALLOC_LIBRARY "system") SET(MALLOC_LIBRARY "system")
CHECK_PCRE() CHECK_PCRE()
CHECK_URING()
CHECK_SYSTEMD() CHECK_SYSTEMD()
IF(CMAKE_CROSSCOMPILING) IF(CMAKE_CROSSCOMPILING)
......
...@@ -49,6 +49,10 @@ MACRO(CHECK_SYSTEMD) ...@@ -49,6 +49,10 @@ MACRO(CHECK_SYSTEMD)
SET(SYSTEMD_EXECSTARTPRE "ExecStartPre=/usr/bin/install -m 755 -o mysql -g root -d /var/run/mysqld") SET(SYSTEMD_EXECSTARTPRE "ExecStartPre=/usr/bin/install -m 755 -o mysql -g root -d /var/run/mysqld")
SET(SYSTEMD_EXECSTARTPOST "ExecStartPost=/etc/mysql/debian-start") SET(SYSTEMD_EXECSTARTPOST "ExecStartPost=/etc/mysql/debian-start")
ENDIF() ENDIF()
IF(LIBURING AND HAVE_LIBURING_H AND NOT WITH_URING STREQUAL "no")
SET(SYSTEMD_LIMIT "# For liburing and io_uring_setup()
LimitMEMLOCK=524288")
ENDIF()
MESSAGE_ONCE(systemd "Systemd features enabled") MESSAGE_ONCE(systemd "Systemd features enabled")
ELSE() ELSE()
UNSET(LIBSYSTEMD) UNSET(LIBSYSTEMD)
......
MACRO(CHECK_URING)
IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
INCLUDE(CheckIncludeFiles)
SET(WITH_URING "auto" CACHE STRING "Enable liburing usage")
IF(WITH_URING STREQUAL "yes" OR WITH_URING STREQUAL "auto")
FIND_LIBRARY(LIBURING uring)
CHECK_INCLUDE_FILES(liburing.h HAVE_LIBURING_H)
IF (LIBURING AND HAVE_LIBURING_H)
ADD_DEFINITIONS(-DHAVE_URING)
LINK_LIBRARIES(uring)
ELSE()
IF(WITH_URING STREQUAL "yes")
MESSAGE(FATAL_ERROR "Requested WITH_URING=yes but liburing was not found")
ENDIF()
ENDIF()
ELSEIF(NOT WITH_URING STREQUAL "no")
MESSAGE(FATAL_ERROR "Invalid value for WITH_URING. Must be 'yes', 'no', or 'auto'.")
ENDIF()
ENDIF()
ENDMACRO()
...@@ -2141,6 +2141,11 @@ static bool innodb_init_param() ...@@ -2141,6 +2141,11 @@ static bool innodb_init_param()
if (srv_use_native_aio) { if (srv_use_native_aio) {
msg("InnoDB: Using Linux native AIO"); msg("InnoDB: Using Linux native AIO");
} }
#elif defined(HAVE_URING)
if (srv_use_native_aio) {
msg("InnoDB: Using liburing");
}
#else #else
/* Currently native AIO is supported only on windows and linux /* Currently native AIO is supported only on windows and linux
and that also when the support is compiled in. In all other and that also when the support is compiled in. In all other
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# -*- cperl -*- # -*- cperl -*-
# Copyright (c) 2004, 2014, Oracle and/or its affiliates. # Copyright (c) 2004, 2014, Oracle and/or its affiliates.
# Copyright (c) 2009, 2020, MariaDB Corporation # Copyright (c) 2009, 2021, MariaDB Corporation
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
...@@ -4376,6 +4376,8 @@ sub extract_warning_lines ($$) { ...@@ -4376,6 +4376,8 @@ sub extract_warning_lines ($$) {
qr|Linux Native AIO|, # warning that aio does not work on /dev/shm qr|Linux Native AIO|, # warning that aio does not work on /dev/shm
qr|InnoDB: io_setup\(\) attempt|, qr|InnoDB: io_setup\(\) attempt|,
qr|InnoDB: io_setup\(\) failed with EAGAIN|, qr|InnoDB: io_setup\(\) failed with EAGAIN|,
qr|io_uring_queue_init\(\) failed with|,
qr|InnoDB: liburing disabled|,
qr|setrlimit could not change the size of core files to 'infinity';|, qr|setrlimit could not change the size of core files to 'infinity';|,
qr|feedback plugin: failed to retrieve the MAC address|, qr|feedback plugin: failed to retrieve the MAC address|,
qr|Plugin 'FEEDBACK' init function returned error|, qr|Plugin 'FEEDBACK' init function returned error|,
......
...@@ -3543,8 +3543,7 @@ static int innodb_init_params() ...@@ -3543,8 +3543,7 @@ static int innodb_init_params()
srv_use_doublewrite_buf = FALSE; srv_use_doublewrite_buf = FALSE;
} }
#ifdef LINUX_NATIVE_AIO #if !defined LINUX_NATIVE_AIO && !defined HAVE_URING && !defined _WIN32
#elif !defined _WIN32
/* Currently native AIO is supported only on windows and linux /* Currently native AIO is supported only on windows and linux
and that also when the support is compiled in. In all other and that also when the support is compiled in. In all other
cases, we ignore the setting of innodb_use_native_aio. */ cases, we ignore the setting of innodb_use_native_aio. */
......
...@@ -56,6 +56,7 @@ IF(UNIX) ...@@ -56,6 +56,7 @@ IF(UNIX)
ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1") ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1")
IF (NOT LIBURING)
CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H) CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO) CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO)
...@@ -63,6 +64,7 @@ IF(UNIX) ...@@ -63,6 +64,7 @@ IF(UNIX)
ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1) ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
LINK_LIBRARIES(aio) LINK_LIBRARIES(aio)
ENDIF() ENDIF()
ENDIF()
IF(HAVE_LIBNUMA) IF(HAVE_LIBNUMA)
LINK_LIBRARIES(numa) LINK_LIBRARIES(numa)
ENDIF() ENDIF()
......
...@@ -4024,6 +4024,17 @@ int os_aio_init() ...@@ -4024,6 +4024,17 @@ int os_aio_init()
} }
#endif #endif
#ifdef HAVE_URING
if (ret)
{
ut_ad(srv_use_native_aio);
ib::warn()
<< "liburing disabled: falling back to innodb_use_native_aio=OFF";
srv_use_native_aio= false;
ret= srv_thread_pool->configure_aio(false, max_events);
}
#endif
if (!ret) if (!ret)
{ {
read_slots= new io_slots(max_read_events, srv_n_read_io_threads); read_slots= new io_slots(max_read_events, srv_n_read_io_threads);
......
...@@ -1193,6 +1193,11 @@ dberr_t srv_start(bool create_new_db) ...@@ -1193,6 +1193,11 @@ dberr_t srv_start(bool create_new_db)
ib::info() << "Using Linux native AIO"; ib::info() << "Using Linux native AIO";
} }
#endif #endif
#ifdef HAVE_URING
if (srv_use_native_aio) {
ib::info() << "Using liburing";
}
#endif
fil_system.create(srv_file_per_table ? 50000 : 5000); fil_system.create(srv_file_per_table ? 50000 : 5000);
......
# Copyright (c) 2006, 2016, Oracle and/or its affiliates. # Copyright (c) 2006, 2016, Oracle and/or its affiliates.
# Copyright (c) 2012, 2017, MariaDB # Copyright (c) 2012, 2021, MariaDB
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
......
...@@ -144,7 +144,7 @@ TimeoutStopSec=900 ...@@ -144,7 +144,7 @@ TimeoutStopSec=900
# Number of files limit. previously [mysqld_safe] open-files-limit # Number of files limit. previously [mysqld_safe] open-files-limit
LimitNOFILE=16384 LimitNOFILE=16384
@SYSTEMD_LIMIT@
# Maximium core size. previously [mysqld_safe] core-file-size # Maximium core size. previously [mysqld_safe] core-file-size
# LimitCore= # LimitCore=
......
...@@ -269,7 +269,7 @@ Group=mysql ...@@ -269,7 +269,7 @@ Group=mysql
# Number of files limit. previously [mysqld_safe] open-files-limit # Number of files limit. previously [mysqld_safe] open-files-limit
LimitNOFILE=16384 LimitNOFILE=16384
@SYSTEMD_LIMIT@
# Maximium core size. previously [mysqld_safe] core-file-size # Maximium core size. previously [mysqld_safe] core-file-size
# LimitCore= # LimitCore=
......
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
IF(WIN32) IF(WIN32)
SET(EXTRA_SOURCES tpool_win.cc aio_win.cc) SET(EXTRA_SOURCES tpool_win.cc aio_win.cc)
ELSE()
SET(EXTRA_SOURCES aio_linux.cc)
ENDIF() ENDIF()
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND LIBURING)
SET(EXTRA_SOURCES aio_liburing.cc)
ENDIF()
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND NOT LIBURING)
CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H) CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO) CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO)
IF(HAVE_LIBAIO_H AND HAVE_LIBAIO) IF(HAVE_LIBAIO_H AND HAVE_LIBAIO)
ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1) ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
LINK_LIBRARIES(aio) LINK_LIBRARIES(aio)
SET(EXTRA_SOURCES aio_linux.cc)
ENDIF() ENDIF()
ENDIF() ENDIF()
......
/* Copyright (C) 2021, MariaDB Corporation.
This program is free software; you can redistribute itand /or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
#include "tpool_structs.h"
#include "tpool.h"
#include "mysql/service_my_print_error.h"
#include "mysqld_error.h"
#include <liburing.h>
#include <algorithm>
#include <vector>
#include <thread>
#include <mutex>
namespace
{
class aio_uring final : public tpool::aio
{
public:
aio_uring(tpool::thread_pool *tpool, int max_aio) : tpool_(tpool)
{
if (io_uring_queue_init(max_aio, &uring_, 0) != 0)
{
switch (const auto e= errno) {
case ENOMEM:
case ENOSYS:
my_printf_error(ER_UNKNOWN_ERROR, e == ENOMEM
? "io_uring_queue_init() failed with ENOMEM:"
" try larger ulimit -l\n"
: "io_uring_queue_init() failed with ENOSYS:"
" try uprading the kernel\n",
ME_ERROR_LOG | ME_WARNING);
break;
default:
my_printf_error(ER_UNKNOWN_ERROR,
"io_uring_queue_init() failed with errno %d\n",
ME_ERROR_LOG | ME_WARNING, e);
}
throw std::runtime_error("aio_uring()");
}
thread_= std::thread(thread_routine, this);
}
~aio_uring() noexcept
{
{
std::lock_guard<std::mutex> _(mutex_);
io_uring_sqe *sqe= io_uring_get_sqe(&uring_);
io_uring_prep_nop(sqe);
io_uring_sqe_set_data(sqe, nullptr);
auto ret= io_uring_submit(&uring_);
if (ret != 1)
{
my_printf_error(ER_UNKNOWN_ERROR,
"io_uring_submit() returned %d during shutdown:"
" this may cause a hang\n",
ME_ERROR_LOG | ME_FATAL, ret);
abort();
}
}
thread_.join();
io_uring_queue_exit(&uring_);
}
int submit_io(tpool::aiocb *cb) final
{
cb->iov_base= cb->m_buffer;
cb->iov_len= cb->m_len;
// The whole operation since io_uring_get_sqe() and till io_uring_submit()
// must be atomical. This is because liburing provides thread-unsafe calls.
std::lock_guard<std::mutex> _(mutex_);
io_uring_sqe *sqe= io_uring_get_sqe(&uring_);
if (cb->m_opcode == tpool::aio_opcode::AIO_PREAD)
io_uring_prep_readv(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1,
cb->m_offset);
else
io_uring_prep_writev(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1,
cb->m_offset);
io_uring_sqe_set_data(sqe, cb);
return io_uring_submit(&uring_) == 1 ? 0 : -1;
}
int bind(native_file_handle &fd) final
{
std::lock_guard<std::mutex> _(files_mutex_);
auto it= std::lower_bound(files_.begin(), files_.end(), fd);
assert(it == files_.end() || *it != fd);
files_.insert(it, fd);
return io_uring_register_files_update(&uring_, 0, files_.data(),
files_.size());
}
int unbind(const native_file_handle &fd) final
{
std::lock_guard<std::mutex> _(files_mutex_);
auto it= std::lower_bound(files_.begin(), files_.end(), fd);
assert(*it == fd);
files_.erase(it);
return io_uring_register_files_update(&uring_, 0, files_.data(),
files_.size());
}
private:
static void thread_routine(aio_uring *aio)
{
for (;;)
{
io_uring_cqe *cqe;
if (int ret= io_uring_wait_cqe(&aio->uring_, &cqe))
{
if (ret == -EINTR) // this may occur during shutdown
break;
my_printf_error(ER_UNKNOWN_ERROR,
"io_uring_wait_cqe() returned %d\n",
ME_ERROR_LOG | ME_FATAL, ret);
abort();
}
auto *iocb= static_cast<tpool::aiocb*>(io_uring_cqe_get_data(cqe));
if (!iocb)
break;
int res= cqe->res;
if (res < 0)
{
iocb->m_err= -res;
iocb->m_ret_len= 0;
}
else
{
iocb->m_err= 0;
iocb->m_ret_len= res;
}
io_uring_cqe_seen(&aio->uring_, cqe);
iocb->m_internal_task.m_func= iocb->m_callback;
iocb->m_internal_task.m_arg= iocb;
iocb->m_internal_task.m_group= iocb->m_group;
aio->tpool_->submit_task(&iocb->m_internal_task);
}
}
io_uring uring_;
std::mutex mutex_;
tpool::thread_pool *tpool_;
std::thread thread_;
std::vector<native_file_handle> files_;
std::mutex files_mutex_;
};
} // namespace
namespace tpool
{
aio *create_linux_aio(thread_pool *pool, int max_aio)
{
try {
return new aio_uring(pool, max_aio);
} catch (std::runtime_error& error) {
return nullptr;
}
}
} // namespace tpool
...@@ -16,7 +16,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/ ...@@ -16,7 +16,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
#include "tpool_structs.h" #include "tpool_structs.h"
#include "tpool.h" #include "tpool.h"
#ifdef LINUX_NATIVE_AIO
# include <thread> # include <thread>
# include <atomic> # include <atomic>
# include <libaio.h> # include <libaio.h>
...@@ -69,7 +68,6 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev) ...@@ -69,7 +68,6 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
} }
return ret; return ret;
} }
#endif
/* /*
...@@ -84,7 +82,6 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev) ...@@ -84,7 +82,6 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
*/ */
namespace tpool namespace tpool
{ {
#ifdef LINUX_NATIVE_AIO
class aio_linux final : public aio class aio_linux final : public aio
{ {
...@@ -187,7 +184,4 @@ aio *create_linux_aio(thread_pool *pool, int max_io) ...@@ -187,7 +184,4 @@ aio *create_linux_aio(thread_pool *pool, int max_io)
} }
return new aio_linux(ctx, pool); return new aio_linux(ctx, pool);
} }
#else
aio *create_linux_aio(thread_pool*, int) { return nullptr; }
#endif
} }
...@@ -22,6 +22,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/ ...@@ -22,6 +22,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
#ifdef LINUX_NATIVE_AIO #ifdef LINUX_NATIVE_AIO
#include <libaio.h> #include <libaio.h>
#endif #endif
#ifdef HAVE_URING
#include <sys/uio.h>
#endif
#ifdef _WIN32 #ifdef _WIN32
#ifndef NOMINMAX #ifndef NOMINMAX
#define NOMINMAX #define NOMINMAX
...@@ -123,6 +126,8 @@ struct aiocb ...@@ -123,6 +126,8 @@ struct aiocb
:OVERLAPPED :OVERLAPPED
#elif defined LINUX_NATIVE_AIO #elif defined LINUX_NATIVE_AIO
:iocb :iocb
#elif defined HAVE_URING
:iovec
#endif #endif
{ {
native_file_handle m_fh; native_file_handle m_fh;
......
...@@ -38,7 +38,11 @@ namespace tpool ...@@ -38,7 +38,11 @@ namespace tpool
{ {
#ifdef __linux__ #ifdef __linux__
#if defined(HAVE_URING) || defined(LINUX_NATIVE_AIO)
extern aio* create_linux_aio(thread_pool* tp, int max_io); extern aio* create_linux_aio(thread_pool* tp, int max_io);
#else
aio *create_linux_aio(thread_pool *, int) { return nullptr; };
#endif
#endif #endif
#ifdef _WIN32 #ifdef _WIN32
extern aio* create_win_aio(thread_pool* tp, int max_io); extern aio* create_win_aio(thread_pool* tp, int max_io);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment