Commit 21adad00 authored by Jan Lindström's avatar Jan Lindström

MDEV-8901: InnoDB: Punch hole is incorrecty done also to log files causing...

MDEV-8901: InnoDB: Punch hole is incorrecty done also to log files causing assertion and database corruption

Analysis: Problem is that punch hole does not know the actual page size
of the page and does the page belong to an data file or to a log file.

Fix: Pass down the file type and page size to os layer to be used
when trim is called. Also fix unsafe null pointer access to
actual write_size.
parent 90f2c822
...@@ -219,6 +219,7 @@ ...@@ -219,6 +219,7 @@
#cmakedefine HAVE_POSIX_FALLOCATE 1 #cmakedefine HAVE_POSIX_FALLOCATE 1
#cmakedefine HAVE_LINUX_FALLOC_H 1 #cmakedefine HAVE_LINUX_FALLOC_H 1
#cmakedefine HAVE_FALLOCATE 1 #cmakedefine HAVE_FALLOCATE 1
#cmakedefine HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE 1
#cmakedefine HAVE_PREAD 1 #cmakedefine HAVE_PREAD 1
#cmakedefine HAVE_PAUSE_INSTRUCTION 1 #cmakedefine HAVE_PAUSE_INSTRUCTION 1
#cmakedefine HAVE_FAKE_PAUSE_INSTRUCTION 1 #cmakedefine HAVE_FAKE_PAUSE_INSTRUCTION 1
......
...@@ -1058,3 +1058,22 @@ CHECK_STRUCT_HAS_MEMBER("struct dirent" d_ino "dirent.h" STRUCT_DIRENT_HAS_D_IN ...@@ -1058,3 +1058,22 @@ CHECK_STRUCT_HAS_MEMBER("struct dirent" d_ino "dirent.h" STRUCT_DIRENT_HAS_D_IN
CHECK_STRUCT_HAS_MEMBER("struct dirent" d_namlen "dirent.h" STRUCT_DIRENT_HAS_D_NAMLEN) CHECK_STRUCT_HAS_MEMBER("struct dirent" d_namlen "dirent.h" STRUCT_DIRENT_HAS_D_NAMLEN)
SET(SPRINTF_RETURNS_INT 1) SET(SPRINTF_RETURNS_INT 1)
CHECK_INCLUDE_FILE(ucontext.h HAVE_UCONTEXT_H) CHECK_INCLUDE_FILE(ucontext.h HAVE_UCONTEXT_H)
IF(NOT MSVC)
CHECK_C_SOURCE_RUNS(
"
#define _GNU_SOURCE
#include <fcntl.h>
#include <linux/falloc.h>
int main()
{
/* Ignore the return value for now. Check if the flags exist.
The return value is checked at runtime. */
fallocate(0, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 0);
return(0);
}"
HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
)
ENDIF()
...@@ -5223,9 +5223,9 @@ fil_extend_space_to_desired_size( ...@@ -5223,9 +5223,9 @@ fil_extend_space_to_desired_size(
success = os_file_write(node->name, node->handle, buf, success = os_file_write(node->name, node->handle, buf,
offset, page_size * n_pages); offset, page_size * n_pages);
#else #else
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC,
node->name, node->handle, buf, node->name, node->handle, buf,
offset, page_size * n_pages, offset, page_size * n_pages, page_size,
node, NULL, 0); node, NULL, 0);
#endif /* UNIV_HOTBACKUP */ #endif /* UNIV_HOTBACKUP */
...@@ -5872,12 +5872,14 @@ fil_io( ...@@ -5872,12 +5872,14 @@ fil_io(
/* Queue the aio request */ /* Queue the aio request */
ret = os_aio( ret = os_aio(
type, type,
is_log,
mode | wake_later, mode | wake_later,
node->name, node->name,
node->handle, node->handle,
buf, buf,
offset, offset,
len, len,
zip_size ? zip_size : UNIV_PAGE_SIZE,
node, node,
message, message,
write_size); write_size);
......
...@@ -311,10 +311,10 @@ The wrapper functions have the prefix of "innodb_". */ ...@@ -311,10 +311,10 @@ The wrapper functions have the prefix of "innodb_". */
# define os_file_close(file) \ # define os_file_close(file) \
pfs_os_file_close_func(file, __FILE__, __LINE__) pfs_os_file_close_func(file, __FILE__, __LINE__)
# define os_aio(type, mode, name, file, buf, offset, \ # define os_aio(type, is_log, mode, name, file, buf, offset, \
n, message1, message2, write_size) \ n, page_size, message1, message2, write_size) \
pfs_os_aio_func(type, mode, name, file, buf, offset, \ pfs_os_aio_func(type, is_log, mode, name, file, buf, offset, \
n, message1, message2, write_size, \ n, page_size, message1, message2, write_size, \
__FILE__, __LINE__) __FILE__, __LINE__)
...@@ -357,10 +357,10 @@ to original un-instrumented file I/O APIs */ ...@@ -357,10 +357,10 @@ to original un-instrumented file I/O APIs */
# define os_file_close(file) os_file_close_func(file) # define os_file_close(file) os_file_close_func(file)
# define os_aio(type, mode, name, file, buf, offset, n, message1, \ # define os_aio(type, is_log, mode, name, file, buf, offset, n, page_size, message1, \
message2, write_size) \ message2, write_size) \
os_aio_func(type, mode, name, file, buf, offset, n, \ os_aio_func(type, is_log, mode, name, file, buf, offset, n, \
message1, message2, write_size) page_size, message1, message2, write_size)
# define os_file_read(file, buf, offset, n) \ # define os_file_read(file, buf, offset, n) \
os_file_read_func(file, buf, offset, n) os_file_read_func(file, buf, offset, n)
...@@ -749,6 +749,7 @@ ibool ...@@ -749,6 +749,7 @@ ibool
pfs_os_aio_func( pfs_os_aio_func(
/*============*/ /*============*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */ ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
const char* name, /*!< in: name of the file or path as a const char* name, /*!< in: name of the file or path as a
null-terminated string */ null-terminated string */
...@@ -757,6 +758,7 @@ pfs_os_aio_func( ...@@ -757,6 +758,7 @@ pfs_os_aio_func(
to write */ to write */
os_offset_t offset, /*!< in: file offset where to read or write */ os_offset_t offset, /*!< in: file offset where to read or write */
ulint n, /*!< in: number of bytes to read or write */ ulint n, /*!< in: number of bytes to read or write */
ulint page_size, /*!< in: page size in bytes */
fil_node_t* message1,/*!< in: message for the aio handler fil_node_t* message1,/*!< in: message for the aio handler
(can be used to identify a completed (can be used to identify a completed
aio operation); ignored if mode is aio operation); ignored if mode is
...@@ -1107,6 +1109,7 @@ ibool ...@@ -1107,6 +1109,7 @@ ibool
os_aio_func( os_aio_func(
/*========*/ /*========*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
to OS_AIO_SIMULATED_WAKE_LATER: the to OS_AIO_SIMULATED_WAKE_LATER: the
last flag advises this function not to wake last flag advises this function not to wake
...@@ -1127,6 +1130,7 @@ os_aio_func( ...@@ -1127,6 +1130,7 @@ os_aio_func(
to write */ to write */
os_offset_t offset, /*!< in: file offset where to read or write */ os_offset_t offset, /*!< in: file offset where to read or write */
ulint n, /*!< in: number of bytes to read or write */ ulint n, /*!< in: number of bytes to read or write */
ulint page_size, /*!< in: page size in bytes */
fil_node_t* message1,/*!< in: message for the aio handler fil_node_t* message1,/*!< in: message for the aio handler
(can be used to identify a completed (can be used to identify a completed
aio operation); ignored if mode is aio operation); ignored if mode is
......
...@@ -199,6 +199,7 @@ ibool ...@@ -199,6 +199,7 @@ ibool
pfs_os_aio_func( pfs_os_aio_func(
/*============*/ /*============*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */ ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
const char* name, /*!< in: name of the file or path as a const char* name, /*!< in: name of the file or path as a
null-terminated string */ null-terminated string */
...@@ -207,6 +208,7 @@ pfs_os_aio_func( ...@@ -207,6 +208,7 @@ pfs_os_aio_func(
to write */ to write */
os_offset_t offset, /*!< in: file offset where to read or write */ os_offset_t offset, /*!< in: file offset where to read or write */
ulint n, /*!< in: number of bytes to read or write */ ulint n, /*!< in: number of bytes to read or write */
ulint page_size, /*!< in: page size in bytes */
fil_node_t* message1,/*!< in: message for the aio handler fil_node_t* message1,/*!< in: message for the aio handler
(can be used to identify a completed (can be used to identify a completed
aio operation); ignored if mode is aio operation); ignored if mode is
...@@ -234,8 +236,8 @@ pfs_os_aio_func( ...@@ -234,8 +236,8 @@ pfs_os_aio_func(
: PSI_FILE_READ, : PSI_FILE_READ,
src_file, src_line); src_file, src_line);
result = os_aio_func(type, mode, name, file, buf, offset, result = os_aio_func(type, is_log, mode, name, file, buf, offset,
n, message1, message2, write_size); n, page_size, message1, message2, write_size);
register_pfs_file_io_end(locker, n); register_pfs_file_io_end(locker, n);
......
...@@ -49,9 +49,8 @@ Created 10/21/1995 Heikki Tuuri ...@@ -49,9 +49,8 @@ Created 10/21/1995 Heikki Tuuri
#include "buf0buf.h" #include "buf0buf.h"
#include "srv0mon.h" #include "srv0mon.h"
#include "srv0srv.h" #include "srv0srv.h"
#ifdef HAVE_POSIX_FALLOCATE #ifdef HAVE_LINUX_UNISTD_H
#include "unistd.h" #include "unistd.h"
#include "fcntl.h"
#endif #endif
#ifndef UNIV_HOTBACKUP #ifndef UNIV_HOTBACKUP
# include "os0sync.h" # include "os0sync.h"
...@@ -84,14 +83,10 @@ Created 10/21/1995 Heikki Tuuri ...@@ -84,14 +83,10 @@ Created 10/21/1995 Heikki Tuuri
#include <linux/falloc.h> #include <linux/falloc.h>
#endif #endif
#if defined(HAVE_FALLOCATE) #ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
#ifndef FALLOC_FL_KEEP_SIZE # include <fcntl.h>
#define FALLOC_FL_KEEP_SIZE 0x01 # include <linux/falloc.h>
#endif #endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
#ifndef FALLOC_FL_PUNCH_HOLE
#define FALLOC_FL_PUNCH_HOLE 0x02
#endif
#endif
#ifdef HAVE_LZO #ifdef HAVE_LZO
#include "lzo/lzo1x.h" #include "lzo/lzo1x.h"
...@@ -209,6 +204,9 @@ struct os_aio_slot_t{ ...@@ -209,6 +204,9 @@ struct os_aio_slot_t{
write */ write */
byte* buf; /*!< buffer used in i/o */ byte* buf; /*!< buffer used in i/o */
ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */ ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */
ulint is_log; /*!< 1 if OS_FILE_LOG or 0 */
ulint page_size; /*!< UNIV_PAGE_SIZE or zip_size */
os_offset_t offset; /*!< file offset in bytes */ os_offset_t offset; /*!< file offset in bytes */
os_file_t file; /*!< file where to read or write */ os_file_t file; /*!< file where to read or write */
const char* name; /*!< file name or path */ const char* name; /*!< file name or path */
...@@ -4474,6 +4472,7 @@ os_aio_slot_t* ...@@ -4474,6 +4472,7 @@ os_aio_slot_t*
os_aio_array_reserve_slot( os_aio_array_reserve_slot(
/*======================*/ /*======================*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
os_aio_array_t* array, /*!< in: aio array */ os_aio_array_t* array, /*!< in: aio array */
fil_node_t* message1,/*!< in: message to be passed along with fil_node_t* message1,/*!< in: message to be passed along with
the aio operation */ the aio operation */
...@@ -4486,6 +4485,7 @@ os_aio_array_reserve_slot( ...@@ -4486,6 +4485,7 @@ os_aio_array_reserve_slot(
to write */ to write */
os_offset_t offset, /*!< in: file offset */ os_offset_t offset, /*!< in: file offset */
ulint len, /*!< in: length of the block to read or write */ ulint len, /*!< in: length of the block to read or write */
ulint page_size, /*!< in: page size in bytes */
ulint* write_size)/*!< in/out: Actual write size initialized ulint* write_size)/*!< in/out: Actual write size initialized
after fist successfull trim after fist successfull trim
operation for this page and if operation for this page and if
...@@ -4580,6 +4580,8 @@ os_aio_array_reserve_slot( ...@@ -4580,6 +4580,8 @@ os_aio_array_reserve_slot(
slot->offset = offset; slot->offset = offset;
slot->io_already_done = FALSE; slot->io_already_done = FALSE;
slot->write_size = write_size; slot->write_size = write_size;
slot->is_log = is_log;
slot->page_size = page_size;
if (message1) { if (message1) {
slot->file_block_size = fil_node_get_block_size(message1); slot->file_block_size = fil_node_get_block_size(message1);
...@@ -4836,6 +4838,7 @@ ibool ...@@ -4836,6 +4838,7 @@ ibool
os_aio_func( os_aio_func(
/*========*/ /*========*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
to OS_AIO_SIMULATED_WAKE_LATER: the to OS_AIO_SIMULATED_WAKE_LATER: the
last flag advises this function not to wake last flag advises this function not to wake
...@@ -4856,6 +4859,7 @@ os_aio_func( ...@@ -4856,6 +4859,7 @@ os_aio_func(
to write */ to write */
os_offset_t offset, /*!< in: file offset where to read or write */ os_offset_t offset, /*!< in: file offset where to read or write */
ulint n, /*!< in: number of bytes to read or write */ ulint n, /*!< in: number of bytes to read or write */
ulint page_size, /*!< in: page size in bytes */
fil_node_t* message1,/*!< in: message for the aio handler fil_node_t* message1,/*!< in: message for the aio handler
(can be used to identify a completed (can be used to identify a completed
aio operation); ignored if mode is aio operation); ignored if mode is
...@@ -4982,8 +4986,8 @@ os_aio_func( ...@@ -4982,8 +4986,8 @@ os_aio_func(
array = NULL; /* Eliminate compiler warning */ array = NULL; /* Eliminate compiler warning */
} }
slot = os_aio_array_reserve_slot(type, array, message1, message2, file, slot = os_aio_array_reserve_slot(type, is_log, array, message1, message2, file,
name, buf, offset, n, write_size); name, buf, offset, n, page_size, write_size);
if (type == OS_FILE_READ) { if (type == OS_FILE_READ) {
if (srv_use_native_aio) { if (srv_use_native_aio) {
...@@ -5251,7 +5255,10 @@ os_aio_windows_handle( ...@@ -5251,7 +5255,10 @@ os_aio_windows_handle(
ret_val = ret && len == slot->len; ret_val = ret && len == slot->len;
} }
if (slot->type == OS_FILE_WRITE && srv_use_trim && os_fallocate_failed == FALSE) { if (slot->type == OS_FILE_WRITE &&
!slot->is_log &&
srv_use_trim &&
os_fallocate_failed == FALSE) {
// Deallocate unused blocks from file system // Deallocate unused blocks from file system
os_file_trim(slot); os_file_trim(slot);
} }
...@@ -5345,7 +5352,10 @@ os_aio_linux_collect( ...@@ -5345,7 +5352,10 @@ os_aio_linux_collect(
/* We have not overstepped to next segment. */ /* We have not overstepped to next segment. */
ut_a(slot->pos < end_pos); ut_a(slot->pos < end_pos);
if (slot->type == OS_FILE_WRITE && srv_use_trim && os_fallocate_failed == FALSE) { if (slot->type == OS_FILE_WRITE &&
!slot->is_log &&
srv_use_trim &&
os_fallocate_failed == FALSE) {
// Deallocate unused blocks from file system // Deallocate unused blocks from file system
os_file_trim(slot); os_file_trim(slot);
} }
...@@ -6220,19 +6230,13 @@ os_file_trim( ...@@ -6220,19 +6230,13 @@ os_file_trim(
{ {
size_t len = slot->len; size_t len = slot->len;
size_t trim_len = UNIV_PAGE_SIZE - len; size_t trim_len = slot->page_size - len;
os_offset_t off = slot->offset + len; os_offset_t off = slot->offset + len;
size_t bsize = slot->file_block_size; size_t bsize = slot->file_block_size;
// len here should be alligned to sector size
ut_ad((trim_len % bsize) == 0);
ut_ad((len % bsize) == 0);
ut_ad(bsize != 0);
ut_ad((off % bsize) == 0);
#ifdef UNIV_TRIM_DEBUG #ifdef UNIV_TRIM_DEBUG
fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu bz %lu\n", fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu bz %lu\n",
*slot->write_size, trim_len, len, off, bsize); slot->write_size ? *slot->write_size : 0, trim_len, len, off, bsize);
#endif #endif
// Nothing to do if trim length is zero or if actual write // Nothing to do if trim length is zero or if actual write
...@@ -6247,22 +6251,19 @@ os_file_trim( ...@@ -6247,22 +6251,19 @@ os_file_trim(
*slot->write_size > 0 && *slot->write_size > 0 &&
len >= *slot->write_size)) { len >= *slot->write_size)) {
#ifdef UNIV_PAGECOMPRESS_DEBUG if (slot->write_size) {
fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu\n", if (*slot->write_size > 0 && len >= *slot->write_size) {
*slot->write_size, trim_len, len); srv_stats.page_compressed_trim_op_saved.inc();
#endif }
if (*slot->write_size > 0 && len >= *slot->write_size) { *slot->write_size = len;
srv_stats.page_compressed_trim_op_saved.inc();
} }
*slot->write_size = len;
return (TRUE); return (TRUE);
} }
#ifdef __linux__ #ifdef __linux__
#if defined(HAVE_FALLOCATE) #if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE)
int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len); int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
if (ret) { if (ret) {
...@@ -6300,7 +6301,7 @@ os_file_trim( ...@@ -6300,7 +6301,7 @@ os_file_trim(
*slot->write_size = 0; *slot->write_size = 0;
} }
#endif /* HAVE_FALLOCATE ... */ #endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE ... */
#elif defined(_WIN32) #elif defined(_WIN32)
FILE_LEVEL_TRIM flt; FILE_LEVEL_TRIM flt;
......
...@@ -5253,9 +5253,9 @@ fil_extend_space_to_desired_size( ...@@ -5253,9 +5253,9 @@ fil_extend_space_to_desired_size(
success = os_file_write(node->name, node->handle, buf, success = os_file_write(node->name, node->handle, buf,
offset, page_size * n_pages); offset, page_size * n_pages);
#else #else
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC,
node->name, node->handle, buf, node->name, node->handle, buf,
offset, page_size * n_pages, offset, page_size * n_pages, page_size,
node, NULL, space_id, NULL, 0); node, NULL, space_id, NULL, 0);
#endif /* UNIV_HOTBACKUP */ #endif /* UNIV_HOTBACKUP */
...@@ -5918,12 +5918,14 @@ _fil_io( ...@@ -5918,12 +5918,14 @@ _fil_io(
/* Queue the aio request */ /* Queue the aio request */
ret = os_aio( ret = os_aio(
type, type,
is_log,
mode | wake_later, mode | wake_later,
node->name, node->name,
node->handle, node->handle,
buf, buf,
offset, offset,
len, len,
zip_size ? zip_size : UNIV_PAGE_SIZE,
node, node,
message, message,
space_id, space_id,
......
...@@ -321,11 +321,11 @@ The wrapper functions have the prefix of "innodb_". */ ...@@ -321,11 +321,11 @@ The wrapper functions have the prefix of "innodb_". */
# define os_file_close(file) \ # define os_file_close(file) \
pfs_os_file_close_func(file, __FILE__, __LINE__) pfs_os_file_close_func(file, __FILE__, __LINE__)
# define os_aio(type, mode, name, file, buf, offset, \ # define os_aio(type, is_log, mode, name, file, buf, offset, \
n, message1, message2, space_id, \ n, page_size, message1, message2, space_id, \
trx, write_size) \ trx, write_size) \
pfs_os_aio_func(type, mode, name, file, buf, offset, \ pfs_os_aio_func(type, is_log, mode, name, file, buf, offset, \
n, message1, message2, space_id, trx, write_size, \ n, page_size, message1, message2, space_id, trx, write_size, \
__FILE__, __LINE__) __FILE__, __LINE__)
# define os_file_read(file, buf, offset, n) \ # define os_file_read(file, buf, offset, n) \
...@@ -372,10 +372,10 @@ to original un-instrumented file I/O APIs */ ...@@ -372,10 +372,10 @@ to original un-instrumented file I/O APIs */
# define os_file_close(file) os_file_close_func(file) # define os_file_close(file) os_file_close_func(file)
# define os_aio(type, mode, name, file, buf, offset, n, message1, \ # define os_aio(type, is_log, mode, name, file, buf, offset, n, page_size, message1, \
message2, space_id, trx, write_size) \ message2, space_id, trx, write_size) \
os_aio_func(type, mode, name, file, buf, offset, n, \ os_aio_func(type, is_log, mode, name, file, buf, offset, n, \
message1, message2, space_id, trx, write_size) page_size, message1, message2, space_id, trx, write_size)
# define os_file_read(file, buf, offset, n) \ # define os_file_read(file, buf, offset, n) \
os_file_read_func(file, buf, offset, n, NULL) os_file_read_func(file, buf, offset, n, NULL)
...@@ -772,6 +772,7 @@ ibool ...@@ -772,6 +772,7 @@ ibool
pfs_os_aio_func( pfs_os_aio_func(
/*============*/ /*============*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */ ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
const char* name, /*!< in: name of the file or path as a const char* name, /*!< in: name of the file or path as a
null-terminated string */ null-terminated string */
...@@ -780,6 +781,7 @@ pfs_os_aio_func( ...@@ -780,6 +781,7 @@ pfs_os_aio_func(
to write */ to write */
os_offset_t offset, /*!< in: file offset where to read or write */ os_offset_t offset, /*!< in: file offset where to read or write */
ulint n, /*!< in: number of bytes to read or write */ ulint n, /*!< in: number of bytes to read or write */
ulint page_size,/*!< in: page size in bytes */
fil_node_t* message1,/*!< in: message for the aio handler fil_node_t* message1,/*!< in: message for the aio handler
(can be used to identify a completed (can be used to identify a completed
aio operation); ignored if mode is aio operation); ignored if mode is
...@@ -1139,6 +1141,7 @@ ibool ...@@ -1139,6 +1141,7 @@ ibool
os_aio_func( os_aio_func(
/*========*/ /*========*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
to OS_AIO_SIMULATED_WAKE_LATER: the to OS_AIO_SIMULATED_WAKE_LATER: the
last flag advises this function not to wake last flag advises this function not to wake
...@@ -1159,6 +1162,7 @@ os_aio_func( ...@@ -1159,6 +1162,7 @@ os_aio_func(
to write */ to write */
os_offset_t offset, /*!< in: file offset where to read or write */ os_offset_t offset, /*!< in: file offset where to read or write */
ulint n, /*!< in: number of bytes to read or write */ ulint n, /*!< in: number of bytes to read or write */
ulint page_size, /*!< in: page size in bytes */
fil_node_t* message1,/*!< in: message for the aio handler fil_node_t* message1,/*!< in: message for the aio handler
(can be used to identify a completed (can be used to identify a completed
aio operation); ignored if mode is aio operation); ignored if mode is
......
...@@ -202,6 +202,7 @@ ibool ...@@ -202,6 +202,7 @@ ibool
pfs_os_aio_func( pfs_os_aio_func(
/*============*/ /*============*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */ ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
const char* name, /*!< in: name of the file or path as a const char* name, /*!< in: name of the file or path as a
null-terminated string */ null-terminated string */
...@@ -210,6 +211,7 @@ pfs_os_aio_func( ...@@ -210,6 +211,7 @@ pfs_os_aio_func(
to write */ to write */
os_offset_t offset, /*!< in: file offset where to read or write */ os_offset_t offset, /*!< in: file offset where to read or write */
ulint n, /*!< in: number of bytes to read or write */ ulint n, /*!< in: number of bytes to read or write */
ulint page_size, /*!< in: page size in bytes */
fil_node_t* message1,/*!< in: message for the aio handler fil_node_t* message1,/*!< in: message for the aio handler
(can be used to identify a completed (can be used to identify a completed
aio operation); ignored if mode is aio operation); ignored if mode is
...@@ -239,8 +241,8 @@ pfs_os_aio_func( ...@@ -239,8 +241,8 @@ pfs_os_aio_func(
: PSI_FILE_READ, : PSI_FILE_READ,
src_file, src_line); src_file, src_line);
result = os_aio_func(type, mode, name, file, buf, offset, result = os_aio_func(type, is_log, mode, name, file, buf, offset,
n, message1, message2, space_id, trx, n, page_size, message1, message2, space_id, trx,
write_size); write_size);
register_pfs_file_io_end(locker, n); register_pfs_file_io_end(locker, n);
......
...@@ -50,9 +50,8 @@ Created 10/21/1995 Heikki Tuuri ...@@ -50,9 +50,8 @@ Created 10/21/1995 Heikki Tuuri
#include "trx0trx.h" #include "trx0trx.h"
#include "srv0mon.h" #include "srv0mon.h"
#include "srv0srv.h" #include "srv0srv.h"
#ifdef HAVE_POSIX_FALLOCATE #ifdef HAVE_LINUX_UNISTD_H
#include "unistd.h" #include "unistd.h"
#include "fcntl.h"
#endif #endif
#ifndef UNIV_HOTBACKUP #ifndef UNIV_HOTBACKUP
# include "os0sync.h" # include "os0sync.h"
...@@ -89,14 +88,10 @@ Created 10/21/1995 Heikki Tuuri ...@@ -89,14 +88,10 @@ Created 10/21/1995 Heikki Tuuri
#include <linux/falloc.h> #include <linux/falloc.h>
#endif #endif
#if defined(HAVE_FALLOCATE) #ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
#ifndef FALLOC_FL_KEEP_SIZE # include <fcntl.h>
#define FALLOC_FL_KEEP_SIZE 0x01 # include <linux/falloc.h>
#endif #endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
#ifndef FALLOC_FL_PUNCH_HOLE
#define FALLOC_FL_PUNCH_HOLE 0x02
#endif
#endif
#ifdef HAVE_LZO #ifdef HAVE_LZO
#include "lzo/lzo1x.h" #include "lzo/lzo1x.h"
...@@ -221,6 +216,9 @@ struct os_aio_slot_t{ ...@@ -221,6 +216,9 @@ struct os_aio_slot_t{
write */ write */
byte* buf; /*!< buffer used in i/o */ byte* buf; /*!< buffer used in i/o */
ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */ ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */
ulint is_log; /*!< 1 is OS_FILE_LOG or 0 */
ulint page_size; /*!< UNIV_PAGE_SIZE or zip_size */
os_offset_t offset; /*!< file offset in bytes */ os_offset_t offset; /*!< file offset in bytes */
os_file_t file; /*!< file where to read or write */ os_file_t file; /*!< file where to read or write */
const char* name; /*!< file name or path */ const char* name; /*!< file name or path */
...@@ -4573,6 +4571,7 @@ os_aio_slot_t* ...@@ -4573,6 +4571,7 @@ os_aio_slot_t*
os_aio_array_reserve_slot( os_aio_array_reserve_slot(
/*======================*/ /*======================*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
os_aio_array_t* array, /*!< in: aio array */ os_aio_array_t* array, /*!< in: aio array */
fil_node_t* message1,/*!< in: message to be passed along with fil_node_t* message1,/*!< in: message to be passed along with
the aio operation */ the aio operation */
...@@ -4585,6 +4584,7 @@ os_aio_array_reserve_slot( ...@@ -4585,6 +4584,7 @@ os_aio_array_reserve_slot(
to write */ to write */
os_offset_t offset, /*!< in: file offset */ os_offset_t offset, /*!< in: file offset */
ulint len, /*!< in: length of the block to read or write */ ulint len, /*!< in: length of the block to read or write */
ulint page_size, /*!< in: page size in bytes */
ulint space_id, ulint space_id,
ulint* write_size)/*!< in/out: Actual write size initialized ulint* write_size)/*!< in/out: Actual write size initialized
after first successfull trim after first successfull trim
...@@ -4681,6 +4681,8 @@ os_aio_array_reserve_slot( ...@@ -4681,6 +4681,8 @@ os_aio_array_reserve_slot(
slot->offset = offset; slot->offset = offset;
slot->io_already_done = FALSE; slot->io_already_done = FALSE;
slot->space_id = space_id; slot->space_id = space_id;
slot->is_log = is_log;
slot->page_size = page_size;
if (message1) { if (message1) {
slot->file_block_size = fil_node_get_block_size(message1); slot->file_block_size = fil_node_get_block_size(message1);
...@@ -4934,6 +4936,7 @@ ibool ...@@ -4934,6 +4936,7 @@ ibool
os_aio_func( os_aio_func(
/*========*/ /*========*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
to OS_AIO_SIMULATED_WAKE_LATER: the to OS_AIO_SIMULATED_WAKE_LATER: the
last flag advises this function not to wake last flag advises this function not to wake
...@@ -4954,6 +4957,7 @@ os_aio_func( ...@@ -4954,6 +4957,7 @@ os_aio_func(
to write */ to write */
os_offset_t offset, /*!< in: file offset where to read or write */ os_offset_t offset, /*!< in: file offset where to read or write */
ulint n, /*!< in: number of bytes to read or write */ ulint n, /*!< in: number of bytes to read or write */
ulint page_size, /*!< in: page size in bytes */
fil_node_t* message1,/*!< in: message for the aio handler fil_node_t* message1,/*!< in: message for the aio handler
(can be used to identify a completed (can be used to identify a completed
aio operation); ignored if mode is aio operation); ignored if mode is
...@@ -5072,8 +5076,8 @@ os_aio_func( ...@@ -5072,8 +5076,8 @@ os_aio_func(
trx->io_read += n; trx->io_read += n;
} }
slot = os_aio_array_reserve_slot(type, array, message1, message2, file, slot = os_aio_array_reserve_slot(type, is_log, array, message1, message2, file,
name, buf, offset, n, space_id, name, buf, offset, n, page_size, space_id,
write_size); write_size);
if (type == OS_FILE_READ) { if (type == OS_FILE_READ) {
...@@ -5294,7 +5298,7 @@ os_aio_windows_handle( ...@@ -5294,7 +5298,7 @@ os_aio_windows_handle(
} }
if (slot->type == OS_FILE_WRITE) { if (slot->type == OS_FILE_WRITE) {
if (srv_use_trim && os_fallocate_failed == FALSE) { if (!slot->is_log && srv_use_trim && os_fallocate_failed == FALSE) {
// Deallocate unused blocks from file system // Deallocate unused blocks from file system
os_file_trim(slot); os_file_trim(slot);
} }
...@@ -5390,7 +5394,7 @@ os_aio_linux_collect( ...@@ -5390,7 +5394,7 @@ os_aio_linux_collect(
ut_a(slot->pos < end_pos); ut_a(slot->pos < end_pos);
if (slot->type == OS_FILE_WRITE) { if (slot->type == OS_FILE_WRITE) {
if (srv_use_trim && os_fallocate_failed == FALSE) { if (!slot->is_log && srv_use_trim && os_fallocate_failed == FALSE) {
// Deallocate unused blocks from file system // Deallocate unused blocks from file system
os_file_trim(slot); os_file_trim(slot);
} }
...@@ -6305,19 +6309,13 @@ os_file_trim( ...@@ -6305,19 +6309,13 @@ os_file_trim(
os_aio_slot_t* slot) /*!< in: slot structure */ os_aio_slot_t* slot) /*!< in: slot structure */
{ {
size_t len = slot->len; size_t len = slot->len;
size_t trim_len = UNIV_PAGE_SIZE - slot->len; size_t trim_len = slot->page_size - slot->len;
os_offset_t off __attribute__((unused)) = slot->offset + len; os_offset_t off __attribute__((unused)) = slot->offset + len;
size_t bsize = slot->file_block_size; size_t bsize = slot->file_block_size;
// len here should be alligned to sector size
ut_ad((trim_len % bsize) == 0);
ut_ad((len % bsize) == 0);
ut_ad(bsize != 0);
ut_ad((off % bsize) == 0);
#ifdef UNIV_TRIM_DEBUG #ifdef UNIV_TRIM_DEBUG
fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu bz %lu\n", fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu bz %lu\n",
*slot->write_size, trim_len, len, off, bsize); slot->write_size ? *slot->write_size : 0, trim_len, len, off, bsize);
#endif #endif
// Nothing to do if trim length is zero or if actual write // Nothing to do if trim length is zero or if actual write
...@@ -6332,22 +6330,19 @@ os_file_trim( ...@@ -6332,22 +6330,19 @@ os_file_trim(
*slot->write_size > 0 && *slot->write_size > 0 &&
len >= *slot->write_size)) { len >= *slot->write_size)) {
#ifdef UNIV_PAGECOMPRESS_DEBUG if (slot->write_size) {
fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu\n", if (*slot->write_size > 0 && len >= *slot->write_size) {
*slot->write_size, trim_len, len); srv_stats.page_compressed_trim_op_saved.inc();
#endif }
if (*slot->write_size > 0 && len >= *slot->write_size) { *slot->write_size = len;
srv_stats.page_compressed_trim_op_saved.inc();
} }
*slot->write_size = len;
return (TRUE); return (TRUE);
} }
#ifdef __linux__ #ifdef __linux__
#if defined(HAVE_FALLOCATE) #if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE)
int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len); int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
if (ret) { if (ret) {
...@@ -6385,7 +6380,7 @@ os_file_trim( ...@@ -6385,7 +6380,7 @@ os_file_trim(
*slot->write_size = 0; *slot->write_size = 0;
} }
#endif /* HAVE_FALLOCATE ... */ #endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE ... */
#elif defined(_WIN32) #elif defined(_WIN32)
FILE_LEVEL_TRIM flt; FILE_LEVEL_TRIM flt;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment