Commit 759deaa0 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-26010 fixup: Use acquire/release memory order

In commit 5f22511e we depend on
Total Store Ordering. For correct operation on ISAs that implement
weaker memory ordering, we must explicitly use release/acquire stores
and loads on buf_page_t::oldest_modification_ to prevent a race condition
when buf_page_t::list does not happen to be on the same cache line.

buf_page_t::clear_oldest_modification(): Assert that the block is
not in buf_pool.flush_list, and use std::memory_order_release.

buf_page_t::oldest_modification_acquire(): Read oldest_modification_
with std::memory_order_acquire. In this way, if the return value is 0,
the caller may safely assume that it will not observe the buf_page_t
as being in buf_pool.flush_list, even if it is not holding
buf_pool.flush_list_mutex.

buf_flush_relocate_on_flush_list(), buf_LRU_free_page():
Invoke buf_page_t::oldest_modification_acquire().
parent 5f22511e
...@@ -43,9 +43,10 @@ template <typename Type> class Atomic_relaxed ...@@ -43,9 +43,10 @@ template <typename Type> class Atomic_relaxed
Type load(std::memory_order o= std::memory_order_relaxed) const Type load(std::memory_order o= std::memory_order_relaxed) const
{ return m.load(o); } { return m.load(o); }
void store(Type i, std::memory_order o= std::memory_order_relaxed)
{ m.store(i, o); }
operator Type() const { return m.load(); } operator Type() const { return m.load(); }
Type operator=(const Type val) Type operator=(const Type i) { store(i); return i; }
{ m.store(val, std::memory_order_relaxed); return val; }
Type operator=(const Atomic_relaxed<Type> &rhs) { return *this= Type{rhs}; } Type operator=(const Atomic_relaxed<Type> &rhs) { return *this= Type{rhs}; }
Type fetch_add(const Type i, std::memory_order o= std::memory_order_relaxed) Type fetch_add(const Type i, std::memory_order o= std::memory_order_relaxed)
{ return m.fetch_add(i, o); } { return m.fetch_add(i, o); }
......
...@@ -1333,21 +1333,19 @@ inline const buf_block_t *buf_pool_t::chunk_t::not_freed() const ...@@ -1333,21 +1333,19 @@ inline const buf_block_t *buf_pool_t::chunk_t::not_freed() const
/* Skip blocks that are not being used for file pages. */ /* Skip blocks that are not being used for file pages. */
break; break;
case BUF_BLOCK_FILE_PAGE: case BUF_BLOCK_FILE_PAGE:
const lsn_t lsn= block->page.oldest_modification();
if (srv_read_only_mode) if (srv_read_only_mode)
{ {
/* The page cleaner is disabled in read-only mode. No pages /* The page cleaner is disabled in read-only mode. No pages
can be dirtied, so all of them must be clean. */ can be dirtied, so all of them must be clean. */
ut_d(lsn_t oldest_modification= block->page.oldest_modification()); ut_ad(lsn == 0 || lsn == recv_sys.recovered_lsn ||
ut_ad(oldest_modification == 0 ||
oldest_modification == recv_sys.recovered_lsn ||
srv_force_recovery == SRV_FORCE_NO_LOG_REDO); srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
ut_ad(!block->page.buf_fix_count()); ut_ad(!block->page.buf_fix_count());
ut_ad(block->page.io_fix() == BUF_IO_NONE); ut_ad(block->page.io_fix() == BUF_IO_NONE);
break; break;
} }
const lsn_t lsn= block->page.oldest_modification();
if (fsp_is_system_temporary(block->page.id().space())) if (fsp_is_system_temporary(block->page.id().space()))
{ {
ut_ad(lsn == 0 || lsn == 2); ut_ad(lsn == 0 || lsn == 2);
......
...@@ -289,7 +289,7 @@ buf_flush_relocate_on_flush_list( ...@@ -289,7 +289,7 @@ buf_flush_relocate_on_flush_list(
mysql_mutex_assert_owner(&buf_pool.mutex); mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(!fsp_is_system_temporary(bpage->id().space())); ut_ad(!fsp_is_system_temporary(bpage->id().space()));
const lsn_t lsn = bpage->oldest_modification(); const lsn_t lsn = bpage->oldest_modification_acquire();
if (!lsn) { if (!lsn) {
return; return;
...@@ -317,13 +317,13 @@ buf_flush_relocate_on_flush_list( ...@@ -317,13 +317,13 @@ buf_flush_relocate_on_flush_list(
/* bpage was removed from buf_pool.flush_list /* bpage was removed from buf_pool.flush_list
since we last checked, and before we acquired since we last checked, and before we acquired
buf_pool.flush_list_mutex. */ buf_pool.flush_list_mutex. */
dpage->list.prev = nullptr;
dpage->list.next = nullptr;
goto was_clean; goto was_clean;
} }
if (lsn == 1) { if (lsn == 1) {
was_clean: was_clean:
dpage->list.prev = nullptr;
dpage->list.next = nullptr;
dpage->clear_oldest_modification(); dpage->clear_oldest_modification();
} else if (prev) { } else if (prev) {
ut_ad(prev->oldest_modification()); ut_ad(prev->oldest_modification());
...@@ -802,14 +802,15 @@ inline void buf_pool_t::release_freed_page(buf_page_t *bpage) ...@@ -802,14 +802,15 @@ inline void buf_pool_t::release_freed_page(buf_page_t *bpage)
bpage->set_io_fix(BUF_IO_NONE); bpage->set_io_fix(BUF_IO_NONE);
bpage->status= buf_page_t::NORMAL; bpage->status= buf_page_t::NORMAL;
mysql_mutex_lock(&flush_list_mutex); mysql_mutex_lock(&flush_list_mutex);
ut_d(const lsn_t oldest_modification= bpage->oldest_modification();)
if (fsp_is_system_temporary(bpage->id().space())) if (fsp_is_system_temporary(bpage->id().space()))
{ {
ut_ad(uncompressed); ut_ad(uncompressed);
ut_ad(bpage->oldest_modification() == 2); ut_ad(oldest_modification == 2);
} }
else else
{ {
ut_ad(bpage->oldest_modification() > 2); ut_ad(oldest_modification > 2);
delete_from_flush_list(bpage, false); delete_from_flush_list(bpage, false);
} }
bpage->clear_oldest_modification(); bpage->clear_oldest_modification();
...@@ -838,6 +839,7 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space) ...@@ -838,6 +839,7 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
ut_ad(space->purpose == FIL_TYPE_TABLESPACE || ut_ad(space->purpose == FIL_TYPE_TABLESPACE ||
space->atomic_write_supported); space->atomic_write_supported);
ut_ad(space->referenced()); ut_ad(space->referenced());
ut_ad(lru || space != fil_system.temp_space);
rw_lock_t *rw_lock; rw_lock_t *rw_lock;
...@@ -881,7 +883,10 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space) ...@@ -881,7 +883,10 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
lru ? "LRU" : "flush_list", lru ? "LRU" : "flush_list",
bpage->id().space(), bpage->id().page_no())); bpage->id().space(), bpage->id().page_no()));
ut_ad(bpage->io_fix() == BUF_IO_WRITE); ut_ad(bpage->io_fix() == BUF_IO_WRITE);
ut_ad(bpage->oldest_modification()); ut_d(const lsn_t oldest_modification= bpage->oldest_modification());
ut_ad(space == fil_system.temp_space
? oldest_modification == 2
: oldest_modification > 2);
ut_ad(bpage->state() == ut_ad(bpage->state() ==
(rw_lock ? BUF_BLOCK_FILE_PAGE : BUF_BLOCK_ZIP_PAGE)); (rw_lock ? BUF_BLOCK_FILE_PAGE : BUF_BLOCK_ZIP_PAGE));
ut_ad(ULINT_UNDEFINED > ut_ad(ULINT_UNDEFINED >
...@@ -946,6 +951,7 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space) ...@@ -946,6 +951,7 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
} }
ut_ad(status == bpage->status); ut_ad(status == bpage->status);
ut_ad(oldest_modification == bpage->oldest_modification());
if (status != buf_page_t::NORMAL || !space->use_doublewrite()) if (status != buf_page_t::NORMAL || !space->use_doublewrite())
{ {
...@@ -954,8 +960,7 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space) ...@@ -954,8 +960,7 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
const lsn_t lsn= mach_read_from_8(my_assume_aligned<8> const lsn_t lsn= mach_read_from_8(my_assume_aligned<8>
(FIL_PAGE_LSN + (frame ? frame (FIL_PAGE_LSN + (frame ? frame
: block->frame))); : block->frame)));
ut_ad(lsn); ut_ad(lsn >= oldest_modification);
ut_ad(lsn >= bpage->oldest_modification());
if (lsn > log_sys.get_flushed_lsn()) if (lsn > log_sys.get_flushed_lsn())
log_write_up_to(lsn, true); log_write_up_to(lsn, true);
} }
......
...@@ -810,7 +810,7 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) ...@@ -810,7 +810,7 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
const ulint fold = id.fold(); const ulint fold = id.fold();
page_hash_latch* hash_lock = buf_pool.page_hash.lock_get(fold); page_hash_latch* hash_lock = buf_pool.page_hash.lock_get(fold);
hash_lock->write_lock(); hash_lock->write_lock();
lsn_t oldest_modification = bpage->oldest_modification(); lsn_t oldest_modification = bpage->oldest_modification_acquire();
if (UNIV_UNLIKELY(!bpage->can_relocate())) { if (UNIV_UNLIKELY(!bpage->can_relocate())) {
/* Do not free buffer fixed and I/O-fixed blocks. */ /* Do not free buffer fixed and I/O-fixed blocks. */
......
...@@ -771,7 +771,7 @@ class buf_page_t ...@@ -771,7 +771,7 @@ class buf_page_t
1 if no modifications are pending, but the block is in buf_pool.flush_list; 1 if no modifications are pending, but the block is in buf_pool.flush_list;
2 if modifications are pending, but the block is not in buf_pool.flush_list 2 if modifications are pending, but the block is not in buf_pool.flush_list
(because id().space() is the temporary tablespace). */ (because id().space() is the temporary tablespace). */
Atomic_counter<lsn_t> oldest_modification_; Atomic_relaxed<lsn_t> oldest_modification_;
/** type of pending I/O operation; protected by buf_pool.mutex /** type of pending I/O operation; protected by buf_pool.mutex
if in_LRU_list */ if in_LRU_list */
...@@ -937,11 +937,18 @@ class buf_page_t ...@@ -937,11 +937,18 @@ class buf_page_t
inline void set_corrupt_id(); inline void set_corrupt_id();
/** @return the log sequence number of the oldest pending modification /** @return the log sequence number of the oldest pending modification
@retval 0 if the block is not in buf_pool.flush_list @retval 0 if the block is being removed from (or not in) buf_pool.flush_list
@retval 1 if the block is in buf_pool.flush_list but not modified @retval 1 if the block is in buf_pool.flush_list but not modified
@retval 2 if the block belongs to the temporary tablespace and @retval 2 if the block belongs to the temporary tablespace and
has unwritten changes */ has unwritten changes */
lsn_t oldest_modification() const { return oldest_modification_; } lsn_t oldest_modification() const { return oldest_modification_; }
/** @return the log sequence number of the oldest pending modification,
@retval 0 if the block is definitely not in buf_pool.flush_list
@retval 1 if the block is in buf_pool.flush_list but not modified
@retval 2 if the block belongs to the temporary tablespace and
has unwritten changes */
lsn_t oldest_modification_acquire() const
{ return oldest_modification_.load(std::memory_order_acquire); }
/** Set oldest_modification when adding to buf_pool.flush_list */ /** Set oldest_modification when adding to buf_pool.flush_list */
inline void set_oldest_modification(lsn_t lsn); inline void set_oldest_modification(lsn_t lsn);
/** Clear oldest_modification after removing from buf_pool.flush_list */ /** Clear oldest_modification after removing from buf_pool.flush_list */
...@@ -963,7 +970,8 @@ class buf_page_t ...@@ -963,7 +970,8 @@ class buf_page_t
void free_file_page() void free_file_page()
{ {
ut_ad(state() == BUF_BLOCK_REMOVE_HASH); ut_ad(state() == BUF_BLOCK_REMOVE_HASH);
ut_d(oldest_modification_= 0); /* for buf_LRU_block_free_non_file_page() */ /* buf_LRU_block_free_non_file_page() asserts !oldest_modification() */
ut_d(oldest_modification_= 0;)
set_corrupt_id(); set_corrupt_id();
ut_d(set_state(BUF_BLOCK_MEMORY)); ut_d(set_state(BUF_BLOCK_MEMORY));
} }
...@@ -2221,7 +2229,8 @@ inline void buf_page_t::set_corrupt_id() ...@@ -2221,7 +2229,8 @@ inline void buf_page_t::set_corrupt_id()
break; break;
case 2: case 2:
ut_ad(fsp_is_system_temporary(id().space())); ut_ad(fsp_is_system_temporary(id().space()));
ut_d(oldest_modification_= 0); /* for buf_LRU_block_free_non_file_page() */ /* buf_LRU_block_free_non_file_page() asserts !oldest_modification() */
ut_d(oldest_modification_= 0;)
break; break;
default: default:
ut_ad("block is dirty" == 0); ut_ad("block is dirty" == 0);
...@@ -2258,7 +2267,12 @@ inline void buf_page_t::clear_oldest_modification() ...@@ -2258,7 +2267,12 @@ inline void buf_page_t::clear_oldest_modification()
ut_ad(state == BUF_BLOCK_FILE_PAGE || state == BUF_BLOCK_ZIP_PAGE || ut_ad(state == BUF_BLOCK_FILE_PAGE || state == BUF_BLOCK_ZIP_PAGE ||
state == BUF_BLOCK_REMOVE_HASH); state == BUF_BLOCK_REMOVE_HASH);
ut_ad(oldest_modification()); ut_ad(oldest_modification());
oldest_modification_= 0; ut_ad(!list.prev);
ut_ad(!list.next);
/* We must use release memory order to guarantee that callers of
oldest_modification_acquire() will observe the block as
being detached from buf_pool.flush_list, after reading the value 0. */
oldest_modification_.store(0, std::memory_order_release);
} }
/** Note that a block is no longer dirty, while not removing /** Note that a block is no longer dirty, while not removing
...@@ -2268,8 +2282,19 @@ inline void buf_page_t::clear_oldest_modification(bool temporary) ...@@ -2268,8 +2282,19 @@ inline void buf_page_t::clear_oldest_modification(bool temporary)
mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex); mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
ut_ad(temporary == fsp_is_system_temporary(id().space())); ut_ad(temporary == fsp_is_system_temporary(id().space()));
ut_ad(io_fix_ == BUF_IO_WRITE); ut_ad(io_fix_ == BUF_IO_WRITE);
ut_ad(temporary ? oldest_modification() == 2 : oldest_modification() > 2); if (temporary)
oldest_modification_= !temporary; {
ut_ad(oldest_modification() == 2);
oldest_modification_= 0;
}
else
{
/* We use release memory order to guarantee that callers of
oldest_modification_acquire() will observe the block as
being detached from buf_pool.flush_list, after reading the value 0. */
ut_ad(oldest_modification() > 2);
oldest_modification_.store(1, std::memory_order_release);
}
} }
/** @return whether the block is modified and ready for flushing */ /** @return whether the block is modified and ready for flushing */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment