Commit 8684af76 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-28137 Some memory transactions are unnecessarily complex

buf_page_get_zip(): Do not perform a system call inside a
memory transaction. Instead, if the page latch is unavailable,
abort the memory transaction and let the fall-back code path
wait for the page latch.

buf_pool_t::watch_remove(): Return the previous state of the block.

buf_page_init_for_read(): Use regular stores for moving the
buffer fix count of watch_remove() to the new block descriptor.

A more extensive version of this was reviewed by Daniel Black
and tested with Intel TSX-NI by Axel Schwenke and Matthias Leich.
My assumption that regular loads and stores would execute faster
in a memory transaction than operations like std::atomic::fetch_add()
turned out to be incorrect.
parent 5ccd845d
......@@ -2269,29 +2269,66 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size)
lookup:
for (bool discard_attempted= false;;)
{
#ifndef NO_ELISION
if (xbegin())
{
transactional_shared_lock_guard<page_hash_latch> g{hash_lock};
if (hash_lock.is_locked())
xabort();
bpage= buf_pool.page_hash.get(page_id, chain);
if (!bpage || buf_pool.watch_is_sentinel(*bpage))
{
xend();
goto must_read_page;
}
if (!bpage->zip.data)
{
/* There is no ROW_FORMAT=COMPRESSED page. */
xend();
return nullptr;
}
if (discard_attempted || !bpage->frame)
{
if (!bpage->lock.s_lock_try())
xabort();
xend();
break;
}
xend();
}
else
#endif
{
hash_lock.lock_shared();
bpage= buf_pool.page_hash.get(page_id, chain);
if (!bpage || buf_pool.watch_is_sentinel(*bpage))
{
hash_lock.unlock_shared();
goto must_read_page;
}
ut_ad(bpage->in_file());
ut_ad(page_id == bpage->id());
if (!bpage->zip.data)
{
/* There is no ROW_FORMAT=COMPRESSED page. */
hash_lock.unlock_shared();
return nullptr;
}
if (discard_attempted || !bpage->frame)
{
/* Even when we are holding a page_hash latch, it should be
/* Even when we are holding a hash_lock, it should be
acceptable to wait for a page S-latch here, because
buf_page_t::read_complete() will not wait for buf_pool.mutex,
and because S-latch would not conflict with a U-latch
that would be protecting buf_page_t::write_complete(). */
bpage->lock.s_lock();
hash_lock.unlock_shared();
break;
}
hash_lock.unlock_shared();
}
discard_attempted= true;
......
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2021, MariaDB Corporation.
Copyright (c) 2015, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -50,19 +50,30 @@ i/o-fixed buffer blocks */
/** Remove the sentinel block for the watch before replacing it with a
real block. watch_unset() or watch_occurred() will notice
that the block has been replaced with the real block.
@param watch sentinel
@param chain locked hash table chain */
inline void buf_pool_t::watch_remove(buf_page_t *watch,
buf_pool_t::hash_chain &chain)
@param w sentinel
@param chain locked hash table chain
@return w->state() */
inline uint32_t buf_pool_t::watch_remove(buf_page_t *w,
buf_pool_t::hash_chain &chain)
{
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(page_hash.lock_get(chain).is_write_locked());
ut_a(watch_is_sentinel(*watch));
if (watch->buf_fix_count())
page_hash.remove(chain, watch);
ut_ad(!watch->in_page_hash);
watch->set_state(buf_page_t::NOT_USED);
watch->id_= page_id_t(~0ULL);
ut_ad(xtest() || page_hash.lock_get(chain).is_write_locked());
ut_ad(w >= &watch[0]);
ut_ad(w < &watch[array_elements(watch)]);
ut_ad(!w->in_zip_hash);
ut_ad(!w->zip.data);
uint32_t s{w->state()};
w->set_state(buf_page_t::NOT_USED);
ut_ad(s >= buf_page_t::UNFIXED);
ut_ad(s < buf_page_t::READ_FIX);
if (~buf_page_t::LRU_MASK & s)
page_hash.remove(chain, w);
ut_ad(!w->in_page_hash);
w->id_= page_id_t(~0ULL);
return s;
}
/** Initialize a page for read to the buffer buf_pool. If the page is
......@@ -139,14 +150,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
{buf_pool.page_hash.lock_get(chain)};
if (hash_page)
{
/* Preserve the reference count. */
uint32_t buf_fix_count= hash_page->state();
ut_a(buf_fix_count >= buf_page_t::UNFIXED);
ut_a(buf_fix_count < buf_page_t::READ_FIX);
buf_pool.watch_remove(hash_page, chain);
block->page.fix(buf_fix_count - buf_page_t::UNFIXED);
}
bpage->set_state(buf_pool.watch_remove(hash_page, chain) +
(buf_page_t::READ_FIX - buf_page_t::UNFIXED));
buf_pool.page_hash.append(chain, &block->page);
}
......@@ -209,16 +214,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
{buf_pool.page_hash.lock_get(chain)};
if (hash_page)
{
/* Preserve the reference count. It can be 0 if
buf_pool_t::watch_unset() is executing concurrently,
waiting for buf_pool.mutex, which we are holding. */
uint32_t buf_fix_count= hash_page->state();
ut_a(buf_fix_count >= buf_page_t::UNFIXED);
ut_a(buf_fix_count < buf_page_t::READ_FIX);
bpage->fix(buf_fix_count - buf_page_t::UNFIXED);
buf_pool.watch_remove(hash_page, chain);
}
bpage->set_state(buf_pool.watch_remove(hash_page, chain) +
(buf_page_t::READ_FIX - buf_page_t::UNFIXED));
buf_pool.page_hash.append(chain, bpage);
}
......
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2013, 2021, MariaDB Corporation.
Copyright (c) 2013, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -1535,9 +1535,10 @@ class buf_pool_t
/** Remove the sentinel block for the watch before replacing it with a
real block. watch_unset() or watch_occurred() will notice
that the block has been replaced with the real block.
@param watch sentinel
@param chain locked hash table chain */
inline void watch_remove(buf_page_t *watch, hash_chain &chain);
@param w sentinel
@param chain locked hash table chain
@return w->state() */
inline uint32_t watch_remove(buf_page_t *w, hash_chain &chain);
/** @return whether less than 1/4 of the buffer pool is available */
TPOOL_SUPPRESS_TSAN
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment