Commit 12672542 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-25121: innodb_flush_method=O_DIRECT fails on compressed tables

Tests with 4096-byte sector size confirm that it is
safe to use O_DIRECT with page_compressed tables.
That had been disabled on Linux, in an attempt to fix MDEV-21584
which had been filed for the O_DIRECT problems earlier.

The fil_node_t::block_size was being set mostly correctly until
commit 10dd290b (MDEV-17380)
introduced a regression in MariaDB Server 10.4.4.

fil_node_open_file(): Only avoid setting O_DIRECT on
ROW_FORMAT=COMPRESSED tables that use KEY_BLOCK_SIZE=1 or 2
(1024 or 2048 bytes).

fil_ibd_create(): Avoid setting O_DIRECT on ROW_FORMAT=COMPRESSED tables
that use KEY_BLOCK_SIZE=1 or 2 (1024 or 2048 bytes).

fil_node_t::find_metadata(): Require fstat() to be always invoked
outside Microsoft Windows, so that fil_node_t::block_size can be set.

fil_node_t::read_page0(): Rely on find_metadata() to assign block_size.

Thanks to Vladislav Vaintroub for testing this on Microsoft Windows
using an old-fashioned rotational hard disk with 4KiB sector size.

Reviewed by: Vladislav Vaintroub

This is a port of commit 00f620b2
and commit 6505662c from 10.2.
parent 39c015b7
...@@ -488,12 +488,16 @@ static bool fil_node_open_file(fil_node_t* node) ...@@ -488,12 +488,16 @@ static bool fil_node_open_file(fil_node_t* node)
const bool first_time_open = node->size == 0; const bool first_time_open = node->size == 0;
bool o_direct_possible = !FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags); ulint type;
if (const ulint ssize = FSP_FLAGS_GET_ZIP_SSIZE(space->flags)) { static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096,
compile_time_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096); "compatibility");
if (ssize < 3) { switch (FSP_FLAGS_GET_ZIP_SSIZE(space->flags)) {
o_direct_possible = false; case 1:
} case 2:
type = OS_DATA_FILE_NO_O_DIRECT;
break;
default:
type = OS_DATA_FILE;
} }
if (first_time_open if (first_time_open
...@@ -514,9 +518,7 @@ static bool fil_node_open_file(fil_node_t* node) ...@@ -514,9 +518,7 @@ static bool fil_node_open_file(fil_node_t* node)
? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT ? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
: OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT, : OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
OS_FILE_AIO, OS_FILE_AIO,
o_direct_possible type,
? OS_DATA_FILE
: OS_DATA_FILE_NO_O_DIRECT,
read_only_mode, read_only_mode,
&success); &success);
...@@ -556,9 +558,7 @@ static bool fil_node_open_file(fil_node_t* node) ...@@ -556,9 +558,7 @@ static bool fil_node_open_file(fil_node_t* node)
? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT ? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
: OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT, : OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
OS_FILE_AIO, OS_FILE_AIO,
o_direct_possible type,
? OS_DATA_FILE
: OS_DATA_FILE_NO_O_DIRECT,
read_only_mode, read_only_mode,
&success); &success);
} }
...@@ -2904,13 +2904,22 @@ fil_ibd_create( ...@@ -2904,13 +2904,22 @@ fil_ibd_create(
return NULL; return NULL;
} }
ulint type;
static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096,
"compatibility");
switch (FSP_FLAGS_GET_ZIP_SSIZE(flags)) {
case 1:
case 2:
type = OS_DATA_FILE_NO_O_DIRECT;
break;
default:
type = OS_DATA_FILE;
}
file = os_file_create( file = os_file_create(
innodb_data_file_key, path, innodb_data_file_key, path,
OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
OS_FILE_NORMAL, OS_FILE_NORMAL, type, srv_read_only_mode, &success);
OS_DATA_FILE,
srv_read_only_mode,
&success);
if (!success) { if (!success) {
/* The following call will print an error message */ /* The following call will print an error message */
......
...@@ -637,7 +637,7 @@ struct fil_node_t { ...@@ -637,7 +637,7 @@ struct fil_node_t {
/** Determine some file metadata when creating or reading the file. /** Determine some file metadata when creating or reading the file.
@param file the file that is being created, or OS_FILE_CLOSED */ @param file the file that is being created, or OS_FILE_CLOSED */
void find_metadata(os_file_t file = OS_FILE_CLOSED void find_metadata(os_file_t file = OS_FILE_CLOSED
#ifdef UNIV_LINUX #ifndef _WIN32
, struct stat* statbuf = NULL , struct stat* statbuf = NULL
#endif #endif
); );
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
Copyright (c) 1995, 2019, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1995, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc. Copyright (c) 2009, Percona Inc.
Copyright (c) 2013, 2020, MariaDB Corporation. Copyright (c) 2013, 2021, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are by Percona Inc.. Those modifications are
...@@ -4137,7 +4137,9 @@ os_file_create_func( ...@@ -4137,7 +4137,9 @@ os_file_create_func(
case SRV_ALL_O_DIRECT_FSYNC: case SRV_ALL_O_DIRECT_FSYNC:
/*Traditional Windows behavior, no buffering for any files.*/ /*Traditional Windows behavior, no buffering for any files.*/
attributes |= FILE_FLAG_NO_BUFFERING; if (type != OS_DATA_FILE_NO_O_DIRECT) {
attributes |= FILE_FLAG_NO_BUFFERING;
}
break; break;
case SRV_FSYNC: case SRV_FSYNC:
...@@ -7707,7 +7709,7 @@ static bool is_file_on_ssd(char *file_path) ...@@ -7707,7 +7709,7 @@ static bool is_file_on_ssd(char *file_path)
/** Determine some file metadata when creating or reading the file. /** Determine some file metadata when creating or reading the file.
@param file the file that is being created, or OS_FILE_CLOSED */ @param file the file that is being created, or OS_FILE_CLOSED */
void fil_node_t::find_metadata(os_file_t file void fil_node_t::find_metadata(os_file_t file
#ifdef UNIV_LINUX #ifndef _WIN32
, struct stat* statbuf , struct stat* statbuf
#endif #endif
) )
...@@ -7747,18 +7749,18 @@ void fil_node_t::find_metadata(os_file_t file ...@@ -7747,18 +7749,18 @@ void fil_node_t::find_metadata(os_file_t file
block_size = 512; block_size = 512;
} }
#else #else
on_ssd = space->atomic_write_supported; struct stat sbuf;
# ifdef UNIV_LINUX if (!statbuf && !fstat(file, &sbuf)) {
if (!on_ssd) { statbuf = &sbuf;
struct stat sbuf;
if (!statbuf && !fstat(file, &sbuf)) {
statbuf = &sbuf;
}
if (statbuf && fil_system.is_ssd(statbuf->st_dev)) {
on_ssd = true;
}
} }
if (statbuf) {
block_size = statbuf->st_blksize;
}
on_ssd = space->atomic_write_supported
# ifdef UNIV_LINUX
|| (statbuf && fil_system.is_ssd(statbuf->st_dev))
# endif # endif
;
#endif #endif
if (!space->atomic_write_supported) { if (!space->atomic_write_supported) {
space->atomic_write_supported = atomic_write space->atomic_write_supported = atomic_write
...@@ -7794,7 +7796,6 @@ bool fil_node_t::read_page0(bool first) ...@@ -7794,7 +7796,6 @@ bool fil_node_t::read_page0(bool first)
if (fstat(handle, &statbuf)) { if (fstat(handle, &statbuf)) {
return false; return false;
} }
block_size = statbuf.st_blksize;
os_offset_t size_bytes = statbuf.st_size; os_offset_t size_bytes = statbuf.st_size;
#else #else
os_offset_t size_bytes = os_file_get_size(handle); os_offset_t size_bytes = os_file_get_size(handle);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment