Commit 22b62eda authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-25113: Make page flushing faster

buf_page_write_complete(): Reduce the buf_pool.mutex hold time,
and do not acquire buf_pool.flush_list_mutex at all.
Instead, mark blocks clean by setting oldest_modification to 1.
Dirty pages of temporary tables will be identified by the special
value 2 instead of the previous special value 1.
(By design of the ib_logfile0 format, actual LSN values smaller
than 2048 are not possible.)

buf_LRU_free_page(), buf_pool_t::get_oldest_modification()
and many other functions will remove the garbage (clean blocks)
from buf_pool.flush_list while holding buf_pool.flush_list_mutex.

buf_pool_t::n_flush_LRU, buf_pool_t::n_flush_list:
Replaced with non-atomic variables, protected by buf_pool.mutex,
to avoid unnecessary synchronization when modifying the counts.

export_vars: Remove unnecessary indirection for
innodb_pages_created, innodb_pages_read, innodb_pages_written.
parent 8af53897
......@@ -7101,7 +7101,7 @@ static void btr_blob_free(buf_block_t *block, bool all, mtr_t *mtr)
mysql_mutex_lock(&buf_pool.mutex);
if (buf_page_t *bpage= buf_pool.page_hash_get_low(page_id, fold))
if(!buf_LRU_free_page(bpage, all) && all && bpage->zip.data)
if (!buf_LRU_free_page(bpage, all) && all && bpage->zip.data)
/* Attempt to deallocate the redundant copy of the uncompressed page
if the whole ROW_FORMAT=COMPRESSED block cannot be deallocted. */
buf_LRU_free_page(bpage, false);
......
......@@ -1346,13 +1346,15 @@ inline const buf_block_t *buf_pool_t::chunk_t::not_freed() const
break;
}
const lsn_t lsn= block->page.oldest_modification();
if (fsp_is_system_temporary(block->page.id().space()))
{
ut_ad(block->page.oldest_modification() <= 1);
ut_ad(lsn == 0 || lsn == 2);
break;
}
if (!block->page.ready_for_replace())
if (lsn > 1 || !block->page.can_relocate())
return block;
break;
......@@ -1509,9 +1511,9 @@ void buf_pool_t::close()
Only on aborted startup (with recovery) or with innodb_fast_shutdown=2
we may discard changes. */
ut_d(const lsn_t oldest= bpage->oldest_modification();)
ut_ad(!oldest || srv_is_being_started ||
srv_fast_shutdown == 2 ||
(oldest == 1 && fsp_is_system_temporary(bpage->id().space())));
ut_ad(fsp_is_system_temporary(bpage->id().space())
? (oldest == 0 || oldest == 2)
: oldest <= 1 || srv_is_being_started || srv_fast_shutdown == 2);
if (bpage->state() != BUF_BLOCK_FILE_PAGE)
buf_page_free_descriptor(bpage);
......@@ -3323,6 +3325,8 @@ buf_page_get_low(
mysql_mutex_unlock(&buf_pool.mutex);
buf_flush_list();
buf_flush_wait_batch_end_acquiring_mutex(false);
while (buf_flush_list_space(space));
os_aio_wait_until_no_pending_writes();
if (fix_block->page.buf_fix_count() == 1
&& !fix_block->page.oldest_modification()) {
......@@ -4438,8 +4442,8 @@ void buf_pool_t::print()
<< UT_LIST_GET_LEN(flush_list)
<< ", n pending decompressions=" << n_pend_unzip
<< ", n pending reads=" << n_pend_reads
<< ", n pending flush LRU=" << n_flush_LRU
<< " list=" << n_flush_list
<< ", n pending flush LRU=" << n_flush_LRU_
<< " list=" << n_flush_list_
<< ", pages made young=" << stat.n_pages_made_young
<< ", not young=" << stat.n_pages_not_made_young
<< ", pages read=" << stat.n_pages_read
......@@ -4538,7 +4542,6 @@ void buf_stats_get_pool_info(buf_pool_info_t *pool_info)
double time_elapsed;
mysql_mutex_lock(&buf_pool.mutex);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
pool_info->pool_size = buf_pool.curr_size;
......@@ -4548,17 +4551,17 @@ void buf_stats_get_pool_info(buf_pool_info_t *pool_info)
pool_info->free_list_len = UT_LIST_GET_LEN(buf_pool.free);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
pool_info->flush_list_len = UT_LIST_GET_LEN(buf_pool.flush_list);
pool_info->n_pend_unzip = UT_LIST_GET_LEN(buf_pool.unzip_LRU);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
pool_info->n_pend_reads = buf_pool.n_pend_reads;
pool_info->n_pending_flush_lru = buf_pool.n_flush_LRU;
pool_info->n_pending_flush_list = buf_pool.n_flush_list;
pool_info->n_pending_flush_lru = buf_pool.n_flush_LRU_;
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
pool_info->n_pending_flush_list = buf_pool.n_flush_list_;
current_time = time(NULL);
time_elapsed = 0.001 + difftime(current_time,
......
This diff is collapsed.
......@@ -108,7 +108,7 @@ uint buf_LRU_old_threshold_ms;
/** Remove bpage from buf_pool.LRU and buf_pool.page_hash.
If bpage->state() == BUF_BLOCK_ZIP_PAGE && !bpage->oldest_modification(),
If bpage->state() == BUF_BLOCK_ZIP_PAGE && bpage->oldest_modification() <= 1,
the object will be freed.
@param bpage buffer block
......@@ -242,8 +242,8 @@ static bool buf_LRU_free_from_common_LRU_list(ulint limit)
buf_pool.lru_scan_itr.set(prev);
const auto accessed = bpage->is_accessed();
if (!bpage->oldest_modification()
&& buf_LRU_free_page(bpage, true)) {
if (buf_LRU_free_page(bpage, true)) {
if (!accessed) {
/* Keep track of pages that are evicted without
ever being accessed. This gives us a measure of
......@@ -449,8 +449,8 @@ buf_block_t* buf_LRU_get_free_block(bool have_mutex)
#ifndef DBUG_OFF
not_found:
#endif
buf_flush_wait_batch_end(true);
mysql_mutex_unlock(&buf_pool.mutex);
buf_flush_wait_batch_end_acquiring_mutex(true);
if (n_iterations > 20 && !buf_lru_free_blocks_error_printed
&& srv_buf_pool_old_size == srv_buf_pool_size) {
......@@ -801,20 +801,33 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
const ulint fold = id.fold();
page_hash_latch* hash_lock = buf_pool.page_hash.lock_get(fold);
hash_lock->write_lock();
lsn_t oldest_modification = bpage->oldest_modification();
if (UNIV_UNLIKELY(!bpage->can_relocate())) {
/* Do not free buffer fixed and I/O-fixed blocks. */
goto func_exit;
}
if (oldest_modification == 1) {
mysql_mutex_lock(&buf_pool.flush_list_mutex);
oldest_modification = bpage->oldest_modification();
if (oldest_modification) {
ut_ad(oldest_modification == 1);
buf_pool.delete_from_flush_list(bpage);
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
ut_ad(!bpage->oldest_modification());
oldest_modification = 0;
}
if (zip || !bpage->zip.data) {
/* This would completely free the block. */
/* Do not completely free dirty blocks. */
if (bpage->oldest_modification()) {
if (oldest_modification) {
goto func_exit;
}
} else if (bpage->oldest_modification()
} else if (oldest_modification
&& bpage->state() != BUF_BLOCK_FILE_PAGE) {
func_exit:
hash_lock->write_unlock();
......
......@@ -40,11 +40,6 @@ Created 1/8/1996 Heikki Tuuri
#include "sql_table.h"
#include <mysql/service_thd_mdl.h>
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
/** Flag to control insert buffer debugging. */
extern uint ibuf_debug;
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
#include "btr0btr.h"
#include "btr0cur.h"
#include "btr0sea.h"
......
......@@ -1892,6 +1892,9 @@ fil_crypt_rotate_page(
if (block->page.status == buf_page_t::FREED) {
/* Do not modify freed pages to avoid an assertion
failure on recovery.*/
} else if (block->page.oldest_modification() > 1) {
/* Do not unnecessarily touch pages that are
already dirty. */
} else if (space->is_stopping()) {
/* The tablespace is closing (in DROP TABLE or
TRUNCATE TABLE or similar): avoid further access */
......
......@@ -955,9 +955,9 @@ static SHOW_VAR innodb_status_variables[]= {
SHOW_SIZE_T},
{"os_log_written", &export_vars.innodb_os_log_written, SHOW_SIZE_T},
{"page_size", &srv_page_size, SHOW_ULONG},
{"pages_created", &export_vars.innodb_pages_created, SHOW_SIZE_T},
{"pages_read", &export_vars.innodb_pages_read, SHOW_SIZE_T},
{"pages_written", &export_vars.innodb_pages_written, SHOW_SIZE_T},
{"pages_created", &buf_pool.stat.n_pages_created, SHOW_SIZE_T},
{"pages_read", &buf_pool.stat.n_pages_read, SHOW_SIZE_T},
{"pages_written", &buf_pool.stat.n_pages_written, SHOW_SIZE_T},
{"row_lock_current_waits", &export_vars.innodb_row_lock_current_waits,
SHOW_SIZE_T},
{"row_lock_time", &export_vars.innodb_row_lock_time, SHOW_LONGLONG},
......
......@@ -763,6 +763,16 @@ class buf_page_t
/** Count of how manyfold this block is currently bufferfixed. */
Atomic_counter<uint32_t> buf_fix_count_;
/** log sequence number of the START of the log entry written of the
oldest modification to this block which has not yet been written
to the data file;
0 if no modifications are pending;
1 if no modifications are pending, but the block is in buf_pool.flush_list;
2 if modifications are pending, but the block is not in buf_pool.flush_list
(because id().space() is the temporary tablespace). */
Atomic_counter<lsn_t> oldest_modification_;
/** type of pending I/O operation; protected by buf_pool.mutex
if in_LRU_list */
Atomic_relaxed<buf_io_fix> io_fix_;
......@@ -812,12 +822,6 @@ class buf_page_t
or if state() is BUF_BLOCK_MEMORY or BUF_BLOCK_REMOVE_HASH. */
UT_LIST_NODE_T(buf_page_t) list;
private:
/** log sequence number of the START of the log entry written of the
oldest modification to this block which has not yet been written
to the data file; 0 if no modifications are pending. */
Atomic_counter<lsn_t> oldest_modification_;
public:
/** @name LRU replacement algorithm fields.
Protected by buf_pool.mutex. */
/* @{ */
......@@ -932,12 +936,19 @@ class buf_page_t
inline void set_io_fix(buf_io_fix io_fix);
inline void set_corrupt_id();
/** @return the oldest modification */
/** @return the log sequence number of the oldest pending modification
@retval 0 if the block is not in buf_pool.flush_list
@retval 1 if the block is in buf_pool.flush_list but not modified
@retval 2 if the block belongs to the temporary tablespace and
has unwritten changes */
lsn_t oldest_modification() const { return oldest_modification_; }
/** Set oldest_modification when adding to buf_pool.flush_list */
inline void set_oldest_modification(lsn_t lsn);
/** Clear oldest_modification when removing from buf_pool.flush_list */
inline void clear_oldest_modification();
/** Note that a block is no longer dirty, while not removing
it from buf_pool.flush_list */
inline void clear_oldest_modification(bool temporary);
/** Notify that a page in a temporary tablespace has been modified. */
void set_temp_modified()
......@@ -945,7 +956,7 @@ class buf_page_t
ut_ad(fsp_is_system_temporary(id().space()));
ut_ad(state() == BUF_BLOCK_FILE_PAGE);
ut_ad(!oldest_modification());
oldest_modification_= 1;
oldest_modification_= 2;
}
/** Prepare to release a file page to buf_pool.free. */
......@@ -1562,23 +1573,24 @@ class buf_pool_t
bool is_block_lock(const rw_lock_t *l) const
{ return is_block_field(static_cast<const void*>(l)); }
/** @return the block that was made dirty the longest time ago */
const buf_page_t *get_oldest_modified() const
{
mysql_mutex_assert_owner(&flush_list_mutex);
const buf_page_t *bpage= UT_LIST_GET_LAST(flush_list);
ut_ad(!bpage || !fsp_is_system_temporary(bpage->id().space()));
ut_ad(!bpage || bpage->oldest_modification());
return bpage;
}
/**
@return the smallest oldest_modification lsn for any page
@retval empty_lsn if all modified persistent pages have been flushed */
lsn_t get_oldest_modification(lsn_t empty_lsn) const
lsn_t get_oldest_modification(lsn_t empty_lsn)
{
const buf_page_t *bpage= get_oldest_modified();
return bpage ? bpage->oldest_modification() : empty_lsn;
mysql_mutex_assert_owner(&flush_list_mutex);
while (buf_page_t *bpage= UT_LIST_GET_LAST(flush_list))
{
ut_ad(!fsp_is_system_temporary(bpage->id().space()));
lsn_t lsn= bpage->oldest_modification();
if (lsn != 1)
{
ut_ad(lsn > 2);
return lsn;
}
delete_from_flush_list(bpage);
}
return empty_lsn;
}
/** Determine if a buffer block was created by chunk_t::create().
......@@ -1792,15 +1804,18 @@ class buf_pool_t
/** Buffer pool mutex */
MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t mutex;
/** Number of pending LRU flush. */
Atomic_counter<ulint> n_flush_LRU;
/** Number of pending LRU flush; protected by mutex. */
ulint n_flush_LRU_;
/** broadcast when n_flush_LRU reaches 0; protected by mutex */
pthread_cond_t done_flush_LRU;
/** Number of pending flush_list flush. */
Atomic_counter<ulint> n_flush_list;
/** Number of pending flush_list flush; protected by mutex */
ulint n_flush_list_;
/** broadcast when n_flush_list reaches 0; protected by mutex */
pthread_cond_t done_flush_list;
TPOOL_SUPPRESS_TSAN ulint n_flush_LRU() const { return n_flush_LRU_; }
TPOOL_SUPPRESS_TSAN ulint n_flush_list() const { return n_flush_list_; }
/** @name General fields */
/* @{ */
ulint curr_pool_size; /*!< Current pool size in bytes */
......@@ -1975,8 +1990,8 @@ class buf_pool_t
last_activity_count= activity_count;
}
// n_flush_LRU + n_flush_list is approximately COUNT(io_fix()==BUF_IO_WRITE)
// in flush_list
// n_flush_LRU() + n_flush_list()
// is approximately COUNT(io_fix()==BUF_IO_WRITE) in flush_list
unsigned freed_page_clock;/*!< a sequence number used
to count the number of buffer
......@@ -2061,13 +2076,35 @@ class buf_pool_t
/** @return whether any I/O is pending */
bool any_io_pending() const
{
return n_pend_reads || n_flush_LRU || n_flush_list;
return n_pend_reads || n_flush_LRU() || n_flush_list();
}
/** @return total amount of pending I/O */
ulint io_pending() const
{
return n_pend_reads + n_flush_LRU + n_flush_list;
return n_pend_reads + n_flush_LRU() + n_flush_list();
}
private:
/** Remove a block from the flush list. */
inline void delete_from_flush_list_low(buf_page_t *bpage);
/** Remove a block from flush_list.
@param bpage buffer pool page
@param clear whether to invoke buf_page_t::clear_oldest_modification() */
void delete_from_flush_list(buf_page_t *bpage, bool clear);
public:
/** Remove a block from flush_list.
@param bpage buffer pool page */
void delete_from_flush_list(buf_page_t *bpage)
{ delete_from_flush_list(bpage, true); }
/** Insert a modified block into the flush list.
@param block modified block
@param lsn start LSN of the mini-transaction that modified the block */
void insert_into_flush_list(buf_block_t *block, lsn_t lsn);
/** Free a page whose underlying file page has been freed. */
inline void release_freed_page(buf_page_t *bpage);
private:
/** Temporary memory for page_compressed and encrypted I/O */
struct io_buf_t
......@@ -2180,7 +2217,7 @@ inline void buf_page_t::set_corrupt_id()
switch (oldest_modification()) {
case 0:
break;
case 1:
case 2:
ut_ad(fsp_is_system_temporary(id().space()));
ut_d(oldest_modification_= 0); /* for buf_LRU_block_free_non_file_page() */
break;
......@@ -2206,7 +2243,7 @@ inline void buf_page_t::set_corrupt_id()
inline void buf_page_t::set_oldest_modification(lsn_t lsn)
{
mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
ut_ad(!oldest_modification());
ut_ad(oldest_modification() <= 1);
oldest_modification_= lsn;
}
......@@ -2221,13 +2258,27 @@ inline void buf_page_t::clear_oldest_modification()
oldest_modification_= 0;
}
/** Note that a block is no longer dirty, while not removing
it from buf_pool.flush_list */
inline void buf_page_t::clear_oldest_modification(bool temporary)
{
mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
ut_ad(temporary == fsp_is_system_temporary(id().space()));
ut_ad(io_fix_ == BUF_IO_WRITE);
ut_ad(temporary ? oldest_modification() == 2 : oldest_modification() > 2);
oldest_modification_= !temporary;
}
/** @return whether the block is modified and ready for flushing */
inline bool buf_page_t::ready_for_flush() const
{
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(in_LRU_list);
ut_a(in_file());
return oldest_modification() && io_fix_ == BUF_IO_NONE;
ut_ad(fsp_is_system_temporary(id().space())
? oldest_modification() == 2
: oldest_modification() > 2);
return io_fix_ == BUF_IO_NONE;
}
/** @return whether the block can be relocated in memory.
......@@ -2304,7 +2355,7 @@ MEMORY: is not in free list, LRU list, or flush list, nor page
hash table
FILE_PAGE: space and offset are defined, is in page hash table
if io_fix == BUF_IO_WRITE,
buf_pool.n_flush_LRU > 0 || buf_pool.n_flush_list > 0
buf_pool.n_flush_LRU() || buf_pool.n_flush_list()
(1) if buf_fix_count == 0, then
is in LRU list, not in free list
......
......@@ -95,7 +95,7 @@ ulint buf_flush_list(ulint max_n= ULINT_UNDEFINED, lsn_t lsn= LSN_MAX);
/** Try to flush dirty pages that belong to a given tablespace.
@param space tablespace
@param n_flushed number of pages written
@return whether any pages might not have been flushed */
@return whether the flush for some pages might not have been initiated */
bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed= nullptr)
MY_ATTRIBUTE((warn_unused_result));
......
/*****************************************************************************
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2019, 2020, MariaDB Corporation.
Copyright (c) 2019, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -26,17 +26,7 @@ Created 11/5/1995 Heikki Tuuri
#include "assume_aligned.h"
#include "buf0buf.h"
#include "mtr0mtr.h"
#include "srv0srv.h"
#include "fsp0types.h"
/********************************************************************//**
Inserts a modified block into the flush list. */
void
buf_flush_insert_into_flush_list(
/*=============================*/
buf_block_t* block, /*!< in/out: block which is modified */
lsn_t lsn); /*!< in: oldest modification */
/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
......@@ -52,8 +42,7 @@ buf_flush_note_modification(
lsn_t end_lsn) /*!< in: end lsn of the mtr that
modified this block */
{
ut_ad(!srv_read_only_mode
|| fsp_is_system_temporary(block->page.id().space()));
ut_ad(!srv_read_only_mode);
ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count());
ut_ad(mach_read_from_8(block->frame + FIL_PAGE_LSN) <= end_lsn);
......@@ -65,12 +54,12 @@ buf_flush_note_modification(
const lsn_t oldest_modification = block->page.oldest_modification();
if (oldest_modification) {
if (oldest_modification > 1) {
ut_ad(oldest_modification <= start_lsn);
} else if (!fsp_is_system_temporary(block->page.id().space())) {
buf_flush_insert_into_flush_list(block, start_lsn);
} else {
} else if (fsp_is_system_temporary(block->page.id().space())) {
block->page.set_temp_modified();
} else {
buf_pool.insert_into_flush_list(block, start_lsn);
}
srv_stats.buf_pool_write_requests.inc();
......
/*****************************************************************************
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2020, MariaDB Corporation.
Copyright (c) 2017, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -32,7 +32,7 @@ inline bool mtr_t::is_block_dirtied(const buf_block_t *block)
{
ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count());
return !block->page.oldest_modification();
return block->page.oldest_modification() <= 1;
}
/**
......
......@@ -737,9 +737,6 @@ struct export_var_t{
ulint innodb_os_log_fsyncs; /*!< n_log_flushes */
ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */
ulint innodb_os_log_pending_fsyncs; /*!< n_pending_log_flushes */
ulint innodb_pages_created; /*!< buf_pool.stat.n_pages_created */
ulint innodb_pages_read; /*!< buf_pool.stat.n_pages_read*/
ulint innodb_pages_written; /*!< buf_pool.stat.n_pages_written */
ulint innodb_row_lock_waits; /*!< srv_n_lock_wait_count */
ulint innodb_row_lock_current_waits; /*!< srv_n_lock_wait_current_count */
int64_t innodb_row_lock_time; /*!< srv_n_lock_wait_time
......
......@@ -1137,12 +1137,6 @@ srv_export_innodb_status(void)
export_vars.innodb_log_writes = srv_stats.log_writes;
export_vars.innodb_pages_created = buf_pool.stat.n_pages_created;
export_vars.innodb_pages_read = buf_pool.stat.n_pages_read;
export_vars.innodb_pages_written = buf_pool.stat.n_pages_written;
export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count;
export_vars.innodb_row_lock_current_waits =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment