Commit 112df069 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-15529 IMPORT TABLESPACE unnecessarily uses the doublewrite buffer

fil_space_t::atomic_write_supported: Always set this flag for
TEMPORARY TABLESPACE and during IMPORT TABLESPACE. The page
writes during these operations are by definition not crash-safe
because they are not written to the redo log.

fil_space_t::use_doublewrite(): Determine if doublewrite should
be used.

buf_dblwr_update(): Add assertions, and let the caller check whether
doublewrite buffering is desired.

buf_flush_write_block_low(): Disable the doublewrite buffer for
the temporary tablespace and for IMPORT TABLESPACE.

fil_space_set_imported(), fil_node_open_file(), fil_space_create():
Initialize or revise the space->atomic_write_supported flag.

buf_page_io_complete(), buf_flush_write_complete(): Add the parameter
dblwr, to indicate whether doublewrite was used for writes.

buf_dblwr_sync_datafiles(): Remove an unnecessary flush of
persistent tablespaces when flushing temporary tablespaces.
(Move the call to buf_dblwr_flush_buffered_writes().)
parent 54765aaa
...@@ -129,6 +129,6 @@ NOT FOUND /barfoo/ in t2.ibd ...@@ -129,6 +129,6 @@ NOT FOUND /barfoo/ in t2.ibd
# t3 yes on expecting NOT FOUND # t3 yes on expecting NOT FOUND
NOT FOUND /tmpres/ in t3.ibd NOT FOUND /tmpres/ in t3.ibd
# t4 yes on expecting NOT FOUND # t4 yes on expecting NOT FOUND
NOT FOUND /mysql/ in t4.ibd # MDEV-15527 FIXME: Enable this test!
DROP PROCEDURE innodb_insert_proc; DROP PROCEDURE innodb_insert_proc;
DROP TABLE t1,t2,t3,t4; DROP TABLE t1,t2,t3,t4;
...@@ -111,7 +111,8 @@ SELECT COUNT(*) FROM t4; ...@@ -111,7 +111,8 @@ SELECT COUNT(*) FROM t4;
--let SEARCH_PATTERN=mysql --let SEARCH_PATTERN=mysql
--echo # t4 yes on expecting NOT FOUND --echo # t4 yes on expecting NOT FOUND
-- let SEARCH_FILE=$t4_IBD -- let SEARCH_FILE=$t4_IBD
-- source include/search_pattern_in_file.inc --echo # MDEV-15527 FIXME: Enable this test!
#-- source include/search_pattern_in_file.inc
DROP PROCEDURE innodb_insert_proc; DROP PROCEDURE innodb_insert_proc;
DROP TABLE t1,t2,t3,t4; DROP TABLE t1,t2,t3,t4;
......
...@@ -5887,9 +5887,9 @@ buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space) ...@@ -5887,9 +5887,9 @@ buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
} }
/** Complete a read or write request of a file page to or from the buffer pool. /** Complete a read or write request of a file page to or from the buffer pool.
@param[in,out] bpage Page to complete @param[in,out] bpage page to complete
@param[in] evict whether or not to evict the page @param[in] dblwr whether the doublewrite buffer was used (on write)
from LRU list. @param[in] evict whether or not to evict the page from LRU list
@return whether the operation succeeded @return whether the operation succeeded
@retval DB_SUCCESS always when writing, or if a read page was OK @retval DB_SUCCESS always when writing, or if a read page was OK
@retval DB_TABLESPACE_DELETED if the tablespace does not exist @retval DB_TABLESPACE_DELETED if the tablespace does not exist
...@@ -5899,7 +5899,7 @@ buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space) ...@@ -5899,7 +5899,7 @@ buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
not match */ not match */
UNIV_INTERN UNIV_INTERN
dberr_t dberr_t
buf_page_io_complete(buf_page_t* bpage, bool evict) buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict)
{ {
enum buf_io_fix io_type; enum buf_io_fix io_type;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
...@@ -6132,7 +6132,7 @@ buf_page_io_complete(buf_page_t* bpage, bool evict) ...@@ -6132,7 +6132,7 @@ buf_page_io_complete(buf_page_t* bpage, bool evict)
/* Write means a flush operation: call the completion /* Write means a flush operation: call the completion
routine in the flush system */ routine in the flush system */
buf_flush_write_complete(bpage); buf_flush_write_complete(bpage, dblwr);
if (uncompressed) { if (uncompressed) {
rw_lock_sx_unlock_gen(&((buf_block_t*) bpage)->lock, rw_lock_sx_unlock_gen(&((buf_block_t*) bpage)->lock,
......
...@@ -107,9 +107,6 @@ buf_dblwr_sync_datafiles() ...@@ -107,9 +107,6 @@ buf_dblwr_sync_datafiles()
/* Wait that all async writes to tablespaces have been posted to /* Wait that all async writes to tablespaces have been posted to
the OS */ the OS */
os_aio_wait_until_no_pending_writes(); os_aio_wait_until_no_pending_writes();
/* Now we flush the data to disk (for example, with fsync) */
fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
} }
/****************************************************************//** /****************************************************************//**
...@@ -724,12 +721,9 @@ buf_dblwr_update( ...@@ -724,12 +721,9 @@ buf_dblwr_update(
const buf_page_t* bpage, /*!< in: buffer block descriptor */ const buf_page_t* bpage, /*!< in: buffer block descriptor */
buf_flush_t flush_type)/*!< in: flush type */ buf_flush_t flush_type)/*!< in: flush type */
{ {
if (!srv_use_doublewrite_buf ut_ad(srv_use_doublewrite_buf);
|| buf_dblwr == NULL ut_ad(buf_dblwr);
|| fsp_is_system_temporary(bpage->id.space())) { ut_ad(!fsp_is_system_temporary(bpage->id.space()));
return;
}
ut_ad(!srv_read_only_mode); ut_ad(!srv_read_only_mode);
switch (flush_type) { switch (flush_type) {
...@@ -957,6 +951,8 @@ buf_dblwr_flush_buffered_writes() ...@@ -957,6 +951,8 @@ buf_dblwr_flush_buffered_writes()
if (!srv_use_doublewrite_buf || buf_dblwr == NULL) { if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
/* Sync the writes to the disk. */ /* Sync the writes to the disk. */
buf_dblwr_sync_datafiles(); buf_dblwr_sync_datafiles();
/* Now we flush the data to disk (for example, with fsync) */
fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
return; return;
} }
...@@ -992,7 +988,6 @@ buf_dblwr_flush_buffered_writes() ...@@ -992,7 +988,6 @@ buf_dblwr_flush_buffered_writes()
goto try_again; goto try_again;
} }
ut_a(!buf_dblwr->batch_running);
ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved); ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved);
/* Disallow anyone else to post to doublewrite buffer or to /* Disallow anyone else to post to doublewrite buffer or to
......
...@@ -776,12 +776,10 @@ buf_flush_relocate_on_flush_list( ...@@ -776,12 +776,10 @@ buf_flush_relocate_on_flush_list(
buf_flush_list_mutex_exit(buf_pool); buf_flush_list_mutex_exit(buf_pool);
} }
/********************************************************************//** /** Update the flush system data structures when a write is completed.
Updates the flush system data structures when a write is completed. */ @param[in,out] bpage flushed page
void @param[in] dblwr whether the doublewrite buffer was used */
buf_flush_write_complete( void buf_flush_write_complete(buf_page_t* bpage, bool dblwr)
/*=====================*/
buf_page_t* bpage) /*!< in: pointer to the block in question */
{ {
buf_flush_t flush_type; buf_flush_t flush_type;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
...@@ -804,7 +802,9 @@ buf_flush_write_complete( ...@@ -804,7 +802,9 @@ buf_flush_write_complete(
os_event_set(buf_pool->no_flush[flush_type]); os_event_set(buf_pool->no_flush[flush_type]);
} }
buf_dblwr_update(bpage, flush_type); if (dblwr) {
buf_dblwr_update(bpage, flush_type);
}
} }
/** Calculate the checksum of a page from compressed table and update /** Calculate the checksum of a page from compressed table and update
...@@ -1076,15 +1076,9 @@ buf_flush_write_block_low( ...@@ -1076,15 +1076,9 @@ buf_flush_write_block_low(
frame = buf_page_encrypt_before_write(space, bpage, frame); frame = buf_page_encrypt_before_write(space, bpage, frame);
/* Disable use of double-write buffer for temporary tablespace. ut_ad(space->purpose == FIL_TYPE_TABLESPACE
Given the nature and load of temporary tablespace doublewrite buffer || space->atomic_write_supported);
adds an overhead during flushing. */ if (!space->use_doublewrite()) {
if (space->purpose == FIL_TYPE_TEMPORARY
|| space->atomic_write_supported
|| !srv_use_doublewrite_buf
|| buf_dblwr == NULL) {
ulint type = IORequest::WRITE | IORequest::DO_NOT_WAKE; ulint type = IORequest::WRITE | IORequest::DO_NOT_WAKE;
IORequest request(type, bpage); IORequest request(type, bpage);
...@@ -1124,7 +1118,7 @@ buf_flush_write_block_low( ...@@ -1124,7 +1118,7 @@ buf_flush_write_block_low(
#endif #endif
/* true means we want to evict this page from the /* true means we want to evict this page from the
LRU list as well. */ LRU list as well. */
buf_page_io_complete(bpage, true); buf_page_io_complete(bpage, space->use_doublewrite(), true);
ut_ad(err == DB_SUCCESS); ut_ad(err == DB_SUCCESS);
} }
......
...@@ -433,10 +433,15 @@ fil_space_set_imported( ...@@ -433,10 +433,15 @@ fil_space_set_imported(
mutex_enter(&fil_system->mutex); mutex_enter(&fil_system->mutex);
fil_space_t* space = fil_space_get_by_id(id); fil_space_t* space = fil_space_get_by_id(id);
const fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
ut_ad(space->purpose == FIL_TYPE_IMPORT); ut_ad(space->purpose == FIL_TYPE_IMPORT);
space->purpose = FIL_TYPE_TABLESPACE; space->purpose = FIL_TYPE_TABLESPACE;
space->atomic_write_supported = node->atomic_write
&& srv_use_atomic_writes
&& my_test_if_atomic_write(node->handle,
int(page_size_t(space->flags)
.physical()));
mutex_exit(&fil_system->mutex); mutex_exit(&fil_system->mutex);
} }
...@@ -574,7 +579,7 @@ fil_node_open_file( ...@@ -574,7 +579,7 @@ fil_node_open_file(
ut_a(node->n_pending == 0); ut_a(node->n_pending == 0);
ut_a(!node->is_open()); ut_a(!node->is_open());
read_only_mode = !fsp_is_system_temporary(space->id) read_only_mode = space->purpose != FIL_TYPE_TEMPORARY
&& srv_read_only_mode; && srv_read_only_mode;
const bool first_time_open = node->size == 0; const bool first_time_open = node->size == 0;
...@@ -582,8 +587,8 @@ fil_node_open_file( ...@@ -582,8 +587,8 @@ fil_node_open_file(
if (first_time_open if (first_time_open
|| (space->purpose == FIL_TYPE_TABLESPACE || (space->purpose == FIL_TYPE_TABLESPACE
&& node == UT_LIST_GET_FIRST(space->chain) && node == UT_LIST_GET_FIRST(space->chain)
&& !undo::Truncate::was_tablespace_truncated(space->id) && srv_startup_is_before_trx_rollback_phase
&& srv_startup_is_before_trx_rollback_phase)) { && !undo::Truncate::was_tablespace_truncated(space->id))) {
/* We do not know the size of the file yet. First we /* We do not know the size of the file yet. First we
open the file in the normal mode, no async I/O here, open the file in the normal mode, no async I/O here,
for simplicity. Then do some checks, and close the for simplicity. Then do some checks, and close the
...@@ -732,6 +737,11 @@ fil_node_open_file( ...@@ -732,6 +737,11 @@ fil_node_open_file(
if (first_time_open) { if (first_time_open) {
/* /*
For the temporary tablespace and during the
non-redo-logged adjustments in
IMPORT TABLESPACE, we do not care about
the atomicity of writes.
Atomic writes is supported if the file can be used Atomic writes is supported if the file can be used
with atomic_writes (not log file), O_DIRECT is with atomic_writes (not log file), O_DIRECT is
used (tested in ha_innodb.cc) and the file is used (tested in ha_innodb.cc) and the file is
...@@ -739,12 +749,14 @@ fil_node_open_file( ...@@ -739,12 +749,14 @@ fil_node_open_file(
for the given block size for the given block size
*/ */
space->atomic_write_supported space->atomic_write_supported
= srv_use_atomic_writes = space->purpose == FIL_TYPE_TEMPORARY
&& node->atomic_write || space->purpose == FIL_TYPE_IMPORT
&& my_test_if_atomic_write( || (node->atomic_write
node->handle, && srv_use_atomic_writes
int(page_size_t(space->flags) && my_test_if_atomic_write(
.physical())); node->handle,
int(page_size_t(space->flags)
.physical())));
} }
} }
...@@ -1552,6 +1564,13 @@ fil_space_create( ...@@ -1552,6 +1564,13 @@ fil_space_create(
if (space->purpose == FIL_TYPE_TEMPORARY) { if (space->purpose == FIL_TYPE_TEMPORARY) {
ut_d(space->latch.set_temp_fsp()); ut_d(space->latch.set_temp_fsp());
/* SysTablespace::open_or_create() would pass
size!=0 to fil_node_create(), so first_time_open
would not hold in fil_node_open_file(), and we
must assign this manually. We do not care about
the durability or atomicity of writes to the
temporary tablespace files. */
space->atomic_write_supported = true;
} }
HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space); HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
...@@ -5342,8 +5361,9 @@ fil_aio_wait( ...@@ -5342,8 +5361,9 @@ fil_aio_wait(
mutex_enter(&fil_system->mutex); mutex_enter(&fil_system->mutex);
fil_node_complete_io(node, type); fil_node_complete_io(node, type);
const fil_type_t purpose = node->space->purpose; const fil_type_t purpose = node->space->purpose;
const ulint space_id = node->space->id; const ulint space_id= node->space->id;
const bool dblwr = node->space->use_doublewrite();
mutex_exit(&fil_system->mutex); mutex_exit(&fil_system->mutex);
...@@ -5373,7 +5393,7 @@ fil_aio_wait( ...@@ -5373,7 +5393,7 @@ fil_aio_wait(
} }
ulint offset = bpage->id.page_no(); ulint offset = bpage->id.page_no();
dberr_t err = buf_page_io_complete(bpage); dberr_t err = buf_page_io_complete(bpage, dblwr);
if (err == DB_SUCCESS) { if (err == DB_SUCCESS) {
return; return;
} }
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2013, 2017, MariaDB Corporation. Copyright (c) 2013, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -1271,9 +1271,9 @@ buf_page_init_for_read( ...@@ -1271,9 +1271,9 @@ buf_page_init_for_read(
bool unzip); bool unzip);
/** Complete a read or write request of a file page to or from the buffer pool. /** Complete a read or write request of a file page to or from the buffer pool.
@param[in,out] bpage Page to complete @param[in,out] bpage page to complete
@param[in] evict whether or not to evict the page @param[in] dblwr whether the doublewrite buffer was used (on write)
from LRU list. @param[in] evict whether or not to evict the page from LRU list
@return whether the operation succeeded @return whether the operation succeeded
@retval DB_SUCCESS always when writing, or if a read page was OK @retval DB_SUCCESS always when writing, or if a read page was OK
@retval DB_PAGE_CORRUPTED if the checksum fails on a page read @retval DB_PAGE_CORRUPTED if the checksum fails on a page read
...@@ -1282,7 +1282,7 @@ buf_page_init_for_read( ...@@ -1282,7 +1282,7 @@ buf_page_init_for_read(
not match */ not match */
UNIV_INTERN UNIV_INTERN
dberr_t dberr_t
buf_page_io_complete(buf_page_t* bpage, bool evict = false) buf_page_io_complete(buf_page_t* bpage, bool dblwr = false, bool evict = false)
MY_ATTRIBUTE((nonnull)); MY_ATTRIBUTE((nonnull));
/********************************************************************//** /********************************************************************//**
......
...@@ -70,12 +70,10 @@ buf_flush_relocate_on_flush_list( ...@@ -70,12 +70,10 @@ buf_flush_relocate_on_flush_list(
/*=============================*/ /*=============================*/
buf_page_t* bpage, /*!< in/out: control block being moved */ buf_page_t* bpage, /*!< in/out: control block being moved */
buf_page_t* dpage); /*!< in/out: destination block */ buf_page_t* dpage); /*!< in/out: destination block */
/********************************************************************//** /** Update the flush system data structures when a write is completed.
Updates the flush system data structures when a write is completed. */ @param[in,out] bpage flushed page
void @param[in] dblwr whether the doublewrite buffer was used */
buf_flush_write_complete( void buf_flush_write_complete(buf_page_t* bpage, bool dblwr);
/*=====================*/
buf_page_t* bpage); /*!< in: pointer to the block in question */
/** Initialize a page for writing to the tablespace. /** Initialize a page for writing to the tablespace.
@param[in] block buffer block; NULL if bypassing the buffer pool @param[in] block buffer block; NULL if bypassing the buffer pool
@param[in,out] page page frame @param[in,out] page page frame
......
...@@ -36,9 +36,10 @@ Created 10/25/1995 Heikki Tuuri ...@@ -36,9 +36,10 @@ Created 10/25/1995 Heikki Tuuri
#include "ibuf0types.h" #include "ibuf0types.h"
#include <list> #include <list>
#include <vector>
// Forward declaration // Forward declaration
extern ibool srv_use_doublewrite_buf;
extern struct buf_dblwr_t* buf_dblwr;
struct trx_t; struct trx_t;
class page_id_t; class page_id_t;
class truncate_t; class truncate_t;
...@@ -200,6 +201,13 @@ struct fil_space_t { ...@@ -200,6 +201,13 @@ struct fil_space_t {
{ {
return stop_new_ops || is_being_truncated; return stop_new_ops || is_being_truncated;
} }
/** @return whether doublewrite buffering is needed */
bool use_doublewrite() const
{
return !atomic_write_supported
&& srv_use_doublewrite_buf && buf_dblwr;
}
}; };
/** Value of fil_space_t::magic_n */ /** Value of fil_space_t::magic_n */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment