Commit 064bd780 authored by Sergey Vojtovich's avatar Sergey Vojtovich

MDEV-15059 - Misc small InnoDB scalability fixes

When cloning oldest view, don't copy ReadView::m_creator_trx_id.
It means that the owner thread is now allowed to access this member
without trx_sys.mutex protection.

To achieve this we have to keep ReadView::m_creator_trx_id in
ReadView::m_ids. This is required to not let purge thread process
records owned by transaction associated with oldest view.

It is only required if trsanction entered read-write mode before it's
view was created.

If transaction entered read-write mode after it's view was created
(trx_set_rw_mode()), purge thread won't be allowed to touch it because
m_low_limit_id >= m_creator_trx_id holds. Thus we don't have to add
this transaction id to ReadView::m_ids.

Cleanups:

ReadView::ids_t: don't seem to make any sense, just complicate matters.

ReadView::copy_trx_ids(): doesn't make sense anymore, integrated into
caller.

ReadView::copy_complete(): not needed anymore.

ReadView copy constructores: don't seem to make any sense.

trx_purge_truncate_history(): removed view argument, access
purge_sys->view directly instead.
parent 04996939
......@@ -39,114 +39,9 @@ class MVCC;
read should not see the modifications to the database. */
class ReadView {
/** This is similar to a std::vector but it is not a drop
in replacement. It is specific to ReadView. */
class ids_t {
typedef trx_ids_t::value_type value_type;
/**
Constructor */
ids_t() : m_ptr(), m_size(), m_reserved() { }
/**
Destructor */
~ids_t() { UT_DELETE_ARRAY(m_ptr); }
/**
Try and increase the size of the array. Old elements are
copied across. It is a no-op if n is < current size.
@param n Make space for n elements */
void reserve(ulint n);
/**
Resize the array, sets the current element count.
@param n new size of the array, in elements */
void resize(ulint n)
{
ut_ad(n <= capacity());
m_size = n;
}
/**
Reset the size to 0 */
void clear() { resize(0); }
/**
@return the capacity of the array in elements */
ulint capacity() const { return(m_reserved); }
/**
Copy and overwrite the current array contents
@param start Source array
@param end Pointer to end of array */
void assign(const value_type* start, const value_type* end);
/**
Insert the value in the correct slot, preserving the order.
Doesn't check for duplicates. */
void insert(value_type value);
/**
@return the value of the first element in the array */
value_type front() const
{
ut_ad(!empty());
return(m_ptr[0]);
}
/**
@return the value of the last element in the array */
value_type back() const
{
ut_ad(!empty());
return(m_ptr[m_size - 1]);
}
/**
Append a value to the array.
@param value the value to append */
void push_back(value_type value);
/**
@return a pointer to the start of the array */
trx_id_t* data() { return(m_ptr); };
/**
@return a const pointer to the start of the array */
const trx_id_t* data() const { return(m_ptr); };
/**
@return the number of elements in the array */
ulint size() const { return(m_size); }
/**
@return true if size() == 0 */
bool empty() const { return(size() == 0); }
private:
// Prevent copying
ids_t(const ids_t&);
ids_t& operator=(const ids_t&);
private:
/** Memory for the array */
value_type* m_ptr;
/** Number of active elements in the array */
ulint m_size;
/** Size of m_ptr in elements */
ulint m_reserved;
friend class ReadView;
};
public:
ReadView() : m_ids(), m_open(false), m_registered(false) {}
ReadView() : m_creator_trx_id(TRX_ID_MAX), m_ids(),
m_registered(false) {}
/** Check whether transaction id is valid.
@param[in] id transaction id to check
@param[in] name table name */
......@@ -179,9 +74,7 @@ class ReadView {
return(true);
}
const ids_t::value_type* p = m_ids.data();
return(!std::binary_search(p, p + m_ids.size(), id));
return(!std::binary_search(m_ids.begin(), m_ids.end(), id));
}
/**
......@@ -196,13 +89,18 @@ class ReadView {
Mark the view as closed */
void close()
{
ut_ad(m_creator_trx_id != TRX_ID_MAX);
m_creator_trx_id = TRX_ID_MAX;
set_open(false);
set_creator_trx_id(TRX_ID_MAX);
}
bool is_open() const
{
return static_cast<trx_id_t>(my_atomic_load64_explicit(
const_cast<int64*>(
reinterpret_cast<const int64*>(
&m_creator_trx_id)),
MY_MEMORY_ORDER_RELAXED)) != TRX_ID_MAX;
}
bool is_open() const { return(m_open); }
void set_open(bool open) { m_open= open; }
bool is_registered() const { return(m_registered); }
void set_registered(bool registered) { m_registered= registered; }
......@@ -242,12 +140,12 @@ class ReadView {
Set the creator transaction id, existing id must be 0.
Note: This shouldbe set only for views created by RW
transactions. Caller must own trx_sys.mutex. */
void creator_trx_id(trx_id_t id)
transactions. */
void set_creator_trx_id(trx_id_t id)
{
ut_ad(id > 0);
ut_ad(m_creator_trx_id == 0);
m_creator_trx_id = id;
my_atomic_store64_explicit(
reinterpret_cast<int64*>(&m_creator_trx_id),
id, MY_MEMORY_ORDER_RELAXED);
}
#ifdef UNIV_DEBUG
......@@ -265,34 +163,18 @@ class ReadView {
}
#endif /* UNIV_DEBUG */
private:
/**
Copy the transaction ids from the source vector */
inline void copy_trx_ids(const trx_ids_t& trx_ids);
/**
Opens a read view where exactly the transactions serialized before this
point in time are seen in the view.
@param id Creator transaction id */
inline void prepare(trx_id_t id);
point in time are seen in the view. */
inline void clone();
/**
Copy state from another view. Must call copy_complete() to finish.
Copy state from another view.
@param other view to copy from */
inline void copy_prepare(const ReadView& other);
/**
Complete the copy, insert the creator transaction id into the
m_trx_ids too and adjust the m_up_limit_id *, if required */
inline void copy_complete();
inline void copy(const ReadView& other);
friend class MVCC;
private:
// Disable copying
ReadView(const ReadView&);
ReadView& operator=(const ReadView&);
private:
/** The read should not see any transaction with trx id >= this
value. In other words, this is the "high water mark". */
trx_id_t m_low_limit_id;
......@@ -308,16 +190,13 @@ class ReadView {
/** Set of RW transactions that was active when this snapshot
was taken */
ids_t m_ids;
trx_ids_t m_ids;
/** The view does not need to see the undo logs for transactions
whose transaction number is strictly smaller (<) than this value:
they can be removed in purge if not needed by other views */
trx_id_t m_low_limit_no;
/** true if view is open. */
bool m_open;
/** true if transaction is in MVCC::m_views. Only thread that owns
this view may access it. */
bool m_registered;
......
......@@ -172,9 +172,6 @@ RW transaction can commit or rollback (or free views). AC-NL-RO transactions
will mark their views as closed but not actually free their views.
*/
/** Minimum number of elements to reserve in ReadView::ids_t */
static const ulint MIN_TRX_IDS = 32;
#ifdef UNIV_DEBUG
/** Functor to validate the view list. */
struct ViewCheck {
......@@ -211,175 +208,15 @@ MVCC::validate() const
#endif /* UNIV_DEBUG */
/**
Try and increase the size of the array. Old elements are
copied across.
@param n Make space for n elements */
void
ReadView::ids_t::reserve(ulint n)
{
if (n <= capacity()) {
return;
}
/** Keep a minimum threshold */
if (n < MIN_TRX_IDS) {
n = MIN_TRX_IDS;
}
value_type* p = m_ptr;
m_ptr = UT_NEW_ARRAY_NOKEY(value_type, n);
m_reserved = n;
ut_ad(size() < capacity());
if (p != NULL) {
::memmove(m_ptr, p, size() * sizeof(value_type));
UT_DELETE_ARRAY(p);
}
}
/**
Copy and overwrite this array contents
@param start Source array
@param end Pointer to end of array */
void
ReadView::ids_t::assign(const value_type* start, const value_type* end)
{
ut_ad(end >= start);
ulint n = end - start;
/* No need to copy the old contents across during reserve(). */
clear();
/* Create extra space if required. */
reserve(n);
resize(n);
ut_ad(size() == n);
::memmove(m_ptr, start, size() * sizeof(value_type));
}
/**
Append a value to the array.
@param value the value to append */
void
ReadView::ids_t::push_back(value_type value)
{
if (capacity() <= size()) {
reserve(size() * 2);
}
m_ptr[m_size++] = value;
ut_ad(size() <= capacity());
}
/**
Insert the value in the correct slot, preserving the order. Doesn't
check for duplicates. */
Opens a read view where exactly the transactions serialized before this
point in time are seen in the view. */
void
ReadView::ids_t::insert(value_type value)
{
ut_ad(value > 0);
reserve(size() + 1);
if (empty() || back() < value) {
push_back(value);
return;
}
value_type* end = data() + size();
value_type* ub = std::upper_bound(data(), end, value);
if (ub == end) {
push_back(value);
} else {
ut_ad(ub < end);
ulint n_elems = std::distance(ub, end);
ulint n = n_elems * sizeof(value_type);
/* Note: Copying overlapped memory locations. */
::memmove(ub + 1, ub, n);
*ub = value;
resize(size() + 1);
}
}
/**
Copy the transaction ids from the source vector */
void
ReadView::copy_trx_ids(const trx_ids_t& trx_ids)
void ReadView::clone()
{
ut_ad(mutex_own(&trx_sys.mutex));
ulint size = trx_ids.size();
if (m_creator_trx_id > 0) {
ut_ad(size > 0);
--size;
}
if (size == 0) {
m_ids.clear();
return;
}
m_ids.reserve(size);
m_ids.resize(size);
ids_t::value_type* p = m_ids.data();
/* Copy all the trx_ids except the creator trx id */
if (m_creator_trx_id > 0) {
/* Note: We go through all this trouble because it is
unclear whether std::vector::resize() will cause an
overhead or not. We should test this extensively and
if the vector to vector copy is fast enough then get
rid of this code and replace it with more readable
and obvious code. The code below does exactly one copy,
and filters out the creator's trx id. */
trx_ids_t::const_iterator it = std::lower_bound(
trx_ids.begin(), trx_ids.end(), m_creator_trx_id);
ut_ad(it != trx_ids.end() && *it == m_creator_trx_id);
ulint i = std::distance(trx_ids.begin(), it);
ulint n = i * sizeof(trx_ids_t::value_type);
::memmove(p, &trx_ids[0], n);
n = (trx_ids.size() - i - 1) * sizeof(trx_ids_t::value_type);
ut_ad(i + (n / sizeof(trx_ids_t::value_type)) == m_ids.size());
if (n > 0) {
::memmove(p + i, &trx_ids[i + 1], n);
}
} else {
ulint n = size * sizeof(trx_ids_t::value_type);
::memmove(p, &trx_ids[0], n);
}
m_up_limit_id = m_ids.front();
m_low_limit_no = m_low_limit_id = trx_sys.get_max_trx_id();
m_ids= trx_sys.rw_trx_ids;
#ifdef UNIV_DEBUG
/* Original assertion was here to make sure that rw_trx_ids and
rw_trx_hash are in sync and they hold either ACTIVE or PREPARED
......@@ -396,34 +233,12 @@ ReadView::copy_trx_ids(const trx_ids_t& trx_ids)
when transaction is registered it first gets added into rw_trx_ids
under trx_sys.mutex protection and then to rw_trx_hash without mutex
protection. Thus we need repeat this lookup. */
for (trx_ids_t::const_iterator it = trx_ids.begin();
it != trx_ids.end(); ++it) {
for (trx_ids_t::const_iterator it = trx_sys.rw_trx_ids.begin();
it != trx_sys.rw_trx_ids.end(); ++it) {
while (!trx_sys.rw_trx_hash.find(*it));
}
#endif /* UNIV_DEBUG */
}
/**
Opens a read view where exactly the transactions serialized before this
point in time are seen in the view.
@param id Creator transaction id */
void
ReadView::prepare(trx_id_t id)
{
ut_ad(mutex_own(&trx_sys.mutex));
m_creator_trx_id = id;
m_low_limit_no = m_low_limit_id = m_up_limit_id =
trx_sys.get_max_trx_id();
if (!trx_sys.rw_trx_ids.empty()) {
copy_trx_ids(trx_sys.rw_trx_ids);
} else {
m_ids.clear();
}
m_up_limit_id = m_ids.empty() ? m_low_limit_id : m_ids.front();
ut_ad(m_up_limit_id <= m_low_limit_id);
if (UT_LIST_GET_LEN(trx_sys.serialisation_list) > 0) {
......@@ -472,8 +287,9 @@ void MVCC::view_open(trx_t* trx)
and this thread.
To avoid this race we should've checked trx_sys.get_max_trx_id() and
do trx->read_view.set_open(true) atomically under trx_sys.mutex
protection. But we're cutting edges to achieve great scalability.
do trx->read_view.set_creator_trx_id(trx->id) atomically under
trx_sys.mutex protection. But we're cutting edges to achieve great
scalability.
There're at least two types of concurrent threads interested in this
value: purge coordinator thread (see MVCC::clone_oldest_view()) and
......@@ -493,80 +309,44 @@ void MVCC::view_open(trx_t* trx)
Second, scary things start when there's a read-write transaction starting
concurrently.
Speculative execution may reorder set_open() before get_max_trx_id(). In
this case purge thread has short gap to clone outdated view. Which is
probably not that bad: it just won't be able to purge things that it was
actually allowed to purge for a short while.
Speculative execution may reorder set_creator_trx_id() before
get_max_trx_id(). In this case purge thread has short gap to clone
outdated view. Which is probably not that bad: it just won't be able to
purge things that it was actually allowed to purge for a short while.
This thread may as well get suspended after trx_sys.get_max_trx_id() and
before trx->read_view.set_open(true). New read-write transaction may get
started, committed and purged meanwhile. It is acceptable as well, since
this view doesn't see it.
before trx->read_view.set_creator_trx_id(trx->id). New read-write
transaction may get started, committed and purged meanwhile. It is
acceptable as well, since this view doesn't see it.
*/
trx->read_view.set_open(true);
trx->read_view.set_creator_trx_id(trx->id);
return;
}
mutex_enter(&trx_sys.mutex);
trx->read_view.prepare(trx->id);
trx->read_view.clone();
if (trx->read_view.is_registered())
UT_LIST_REMOVE(m_views, &trx->read_view);
else
trx->read_view.set_registered(true);
trx->read_view.set_open(true);
trx->read_view.set_creator_trx_id(trx->id);
UT_LIST_ADD_FIRST(m_views, &trx->read_view);
ut_ad(validate());
mutex_exit(&trx_sys.mutex);
}
/**
Copy state from another view. Must call copy_complete() to finish.
Copy state from another view.
@param other view to copy from */
void
ReadView::copy_prepare(const ReadView& other)
ReadView::copy(const ReadView& other)
{
ut_ad(&other != this);
if (!other.m_ids.empty()) {
const ids_t::value_type* p = other.m_ids.data();
m_ids.assign(p, p + other.m_ids.size());
} else {
m_ids.clear();
}
m_ids= other.m_ids;
m_up_limit_id = other.m_up_limit_id;
m_low_limit_no = other.m_low_limit_no;
m_low_limit_id = other.m_low_limit_id;
m_creator_trx_id = other.m_creator_trx_id;
}
/**
Complete the copy, insert the creator transaction id into the
m_ids too and adjust the m_up_limit_id, if required */
void
ReadView::copy_complete()
{
ut_ad(!mutex_own(&trx_sys.mutex));
if (m_creator_trx_id > 0) {
m_ids.insert(m_creator_trx_id);
}
if (!m_ids.empty()) {
/* The last active transaction has the smallest id. */
m_up_limit_id = std::min(m_ids.front(), m_up_limit_id);
}
ut_ad(m_up_limit_id <= m_low_limit_id);
/* We added the creator transaction ID to the m_ids. */
m_creator_trx_id = 0;
}
/** Clones the oldest view and stores it in view. No need to
......@@ -586,14 +366,13 @@ MVCC::clone_oldest_view(ReadView* view)
{
if (oldest_view->is_open())
{
view->copy_prepare(*oldest_view);
view->copy(*oldest_view);
mutex_exit(&trx_sys.mutex);
view->copy_complete();
return;
}
}
/* No views in the list: snapshot current state. */
view->prepare(0);
view->clone();
mutex_exit(&trx_sys.mutex);
}
......
......@@ -225,7 +225,6 @@ purge_sys_t::~purge_sys_t()
ut_a(sess->trx->id == 0);
sess->trx->state = TRX_STATE_NOT_STARTED;
sess_close(sess);
view.close();
rw_lock_free(&latch);
/* rw_lock_free() already called latch.~rw_lock_t(); tame the
debug assertions when the destructor will be called once more. */
......@@ -1063,23 +1062,21 @@ trx_purge_initiate_truncate(
DBUG_SUICIDE(););
}
/********************************************************************//**
/**
Removes unnecessary history data from rollback segments. NOTE that when this
function is called, the caller must not have any latches on undo log pages! */
static
void
trx_purge_truncate_history(
/*========================*/
purge_iter_t* limit, /*!< in: truncate limit */
const ReadView* view) /*!< in: purge view */
function is called, the caller must not have any latches on undo log pages!
@param[in] limit truncate limit
*/
static void trx_purge_truncate_history(purge_iter_t *limit)
{
ut_ad(trx_purge_check_limit());
/* We play safe and set the truncate limit at most to the purge view
low_limit number, though this is not necessary */
if (limit->trx_no >= view->low_limit_no()) {
limit->trx_no = view->low_limit_no();
if (limit->trx_no >= purge_sys->view.low_limit_no()) {
limit->trx_no = purge_sys->view.low_limit_no();
limit->undo_no = 0;
limit->undo_rseg_space = ULINT_UNDEFINED;
}
......@@ -1668,8 +1665,7 @@ trx_purge(
trx_purge_truncate_history(
purge_sys->limit.trx_no
? &purge_sys->limit
: &purge_sys->iter,
&purge_sys->view);
: &purge_sys->iter);
}
MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1);
......
......@@ -1578,7 +1578,7 @@ trx_commit_in_memory(
written */
{
trx->must_flush_log_later = false;
trx->read_view.set_open(false);
trx->read_view.close();
if (trx_is_autocommit_non_locking(trx)) {
......@@ -2727,13 +2727,13 @@ trx_set_rw_mode(
mutex_enter(&trx_sys.mutex);
trx->id = trx_sys.get_new_trx_id();
trx_sys.rw_trx_ids.push_back(trx->id);
mutex_exit(&trx_sys.mutex);
trx_sys.rw_trx_hash.insert(trx);
/* So that we can see our own changes. */
if (trx->read_view.is_open()) {
trx->read_view.creator_trx_id(trx->id);
trx->read_view.set_creator_trx_id(trx->id);
}
mutex_exit(&trx_sys.mutex);
trx_sys.rw_trx_hash.insert(trx);
}
/**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment