Commit 0e60f89c authored by sensssz's avatar sensssz

Change VATS implementation.

parent 03b3425d
......@@ -83,6 +83,17 @@ lock_rec_has_to_wait_in_queue(
/*==========================*/
const lock_t* wait_lock); /*!< in: waiting record lock */
/*************************************************************//**
Grants a lock to a waiting lock request and releases the waiting transaction.
The caller must hold lock_sys->mutex. */
static
void
lock_grant(
/*=======*/
lock_t* lock, /*!< in/out: waiting lock request */
bool owns_trx_mutex); /*!< in: whether lock->trx->mutex is owned */
/* Buffer to collect THDs to report waits for. */
struct thd_wait_reports {
struct thd_wait_reports *next; /*!< List link */
......@@ -1724,6 +1735,149 @@ RecLock::lock_alloc(
return(lock);
}
/*********************************************************************//**
Check if lock1 has higher priority than lock2.
NULL has lowest priority.
If either is a high priority transaction, the lock has higher priority.
If neither of them is wait lock, the first one has higher priority.
If only one of them is a wait lock, it has lower priority.
Otherwise, the one with an older transaction has higher priority.
@returns true if lock1 has higher priority, false otherwise. */
bool
has_higher_priority(
lock_t *lock1,
lock_t *lock2)
{
if (lock1 == NULL) {
return false;
} else if (lock2 == NULL) {
return true;
}
// Ask the upper server layer if any of the two trx should be prefered.
int preference = thd_deadlock_victim_preference(lock1->trx->mysql_thd, lock2->trx->mysql_thd);
if (preference == -1) {
// lock1 is preferred as a victim, so lock2 has higher priority
return false;
} else if (preference == 1) {
// lock2 is preferred as a victim, so lock1 has higher priority
return true;
}
if (trx_is_high_priority(lock1->trx)) {
return true;
}
if (trx_is_high_priority(lock2->trx)) {
return false;
}
// No preference. Compre them by wait mode and trx age.
if (!lock_get_wait(lock1)) {
return true;
} else if (!lock_get_wait(lock2)) {
return false;
}
return lock1->trx->start_time < lock2->trx->start_time;
}
/*********************************************************************//**
Insert a lock to the hash list according to the mode (whether it is a wait
lock) and the age of the transaction the it is associated with.
If the lock is not a wait lock, insert it to the head of the hash list.
Otherwise, insert it to the middle of the wait locks according to the age of
the transaciton. */
static
void
lock_rec_insert_by_trx_age(
lock_t *in_lock) /*!< in: lock to be insert */{
ulint space;
ulint page_no;
ulint rec_fold;
hash_table_t* hash;
hash_cell_t* cell;
lock_t* node;
lock_t* next;
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
hash = lock_hash_get(in_lock->type_mode);
cell = hash_get_nth_cell(hash,
hash_calc_hash(rec_fold, hash));
node = (lock_t *) cell->node;
// If in_lock is not a wait lock, we insert it to the head of the list.
if (node == NULL || !lock_get_wait(in_lock) || has_higher_priority(in_lock, node)) {
cell->node = in_lock;
in_lock->hash = node;
return;
}
while (node != NULL && has_higher_priority((lock_t *) node->hash,
in_lock)) {
node = (lock_t *) node->hash;
}
next = (lock_t *) node->hash;
node->hash = in_lock;
in_lock->hash = next;
}
static
bool
lock_queue_validate(
const lock_t *in_lock) /*!< in: lock whose hash list is to be validated */
{
ulint space;
ulint page_no;
ulint rec_fold;
hash_table_t* hash;
hash_cell_t* cell;
lock_t* next;
bool wait_lock = false;
if (in_lock == NULL) {
return true;
}
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
hash = lock_hash_get(in_lock->type_mode);
cell = hash_get_nth_cell(hash,
hash_calc_hash(rec_fold, hash));
next = (lock_t *) cell->node;
while (next != NULL) {
// If this is a granted lock, check that there's no wait lock before it.
if (!lock_get_wait(next)) {
ut_ad(!wait_lock);
} else {
wait_lock = true;
}
next = next->hash;
}
return true;
}
static
void
lock_rec_insert_to_head(
lock_t *in_lock, /*!< in: lock to be insert */
ulint rec_fold) /*!< in: rec_fold of the page */
{
hash_table_t* hash;
hash_cell_t* cell;
lock_t* node;
if (in_lock == NULL) {
return;
}
hash = lock_hash_get(in_lock->type_mode);
cell = hash_get_nth_cell(hash,
hash_calc_hash(rec_fold, hash));
node = (lock_t *) cell->node;
if (node != in_lock) {
cell->node = in_lock;
in_lock->hash = node;
}
}
/**
Add the lock to the record lock hash and the transaction's lock list
@param[in,out] lock Newly created record lock to add to the rec hash
......@@ -1733,16 +1887,28 @@ RecLock::lock_add(lock_t* lock, bool add_to_hash)
{
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(lock->trx));
bool wait_lock = m_mode & LOCK_WAIT;
if (add_to_hash) {
ulint key = m_rec_id.fold();
hash_table_t *lock_hash = lock_hash_get(m_mode);
++lock->index->table->n_rec_locks;
HASH_INSERT(lock_t, hash, lock_hash_get(m_mode), key, lock);
if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
&& !is_slave_replication) {
if (wait_lock) {
HASH_INSERT(lock_t, hash, lock_hash, key, lock);
} else {
lock_rec_insert_to_head(lock, m_rec_id.fold());
}
} else {
HASH_INSERT(lock_t, hash, lock_hash, key, lock);
}
}
if (m_mode & LOCK_WAIT) {
if (wait_lock) {
lock_set_lock_and_trx_wait(lock, lock->trx);
}
......@@ -1892,85 +2058,6 @@ RecLock::create(
return(lock);
}
/*********************************************************************//**
Check if lock1 has higher priority than lock2.
NULL has lowest priority.
If either is a high priority transaction, the lock has higher priority.
If neither of them is wait lock, the first one has higher priority.
If only one of them is a wait lock, it has lower priority.
Otherwise, the one with an older transaction has higher priority.
@returns true if lock1 has higher priority, false otherwise. */
bool
has_higher_priority(
lock_t *lock1,
lock_t *lock2)
{
if (lock1 == NULL) {
return false;
} else if (lock2 == NULL) {
return true;
}
// Ask the upper server layer if any of the two trx should be prefered.
int preference = thd_deadlock_victim_preference(lock1->trx->mysql_thd, lock2->trx->mysql_thd);
if (preference == -1) {
// lock1 is preferred as a victim, so lock2 has higher priority
return false;
} else if (preference == 1) {
// lock2 is preferred as a victim, so lock1 has higher priority
return true;
}
// No preference. Compre them by wait mode and trx age.
if (!lock_get_wait(lock1)) {
return true;
} else if (!lock_get_wait(lock2)) {
return false;
}
return lock1->trx->start_time < lock2->trx->start_time;
}
/*********************************************************************//**
Insert a lock to the hash list according to the mode (whether it is a wait
lock) and the age of the transaction the it is associated with.
If the lock is not a wait lock, insert it to the head of the hash list.
Otherwise, insert it to the middle of the wait locks according to the age of
the transaciton. */
static
void
lock_rec_insert_by_trx_age(
lock_t *in_lock, /*!< in: lock to be insert */
bool wait) /*!< in: whether it's a wait lock */
{
ulint space;
ulint page_no;
ulint rec_fold;
hash_table_t* hash;
hash_cell_t* cell;
lock_t* node;
lock_t* next;
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
hash = lock_hash_get(in_lock->type_mode);
cell = hash_get_nth_cell(hash,
hash_calc_hash(rec_fold, hash));
node = (lock_t *) cell->node;
// If in_lock is not a wait lock, we insert it to the head of the list.
if (node == NULL || !wait || has_higher_priority(in_lock, node)) {
cell->node = in_lock;
in_lock->hash = node;
return;
}
while (node != NULL && has_higher_priority((lock_t *) node->hash,
in_lock)) {
node = (lock_t *) node->hash;
}
next = (lock_t *) node->hash;
node->hash = in_lock;
in_lock->hash = next;
}
/**
Check the outcome of the deadlock check
@param[in,out] victim_trx Transaction selected for rollback
......@@ -2176,6 +2263,23 @@ RecLock::add_to_waitq(const lock_t* wait_for, const lock_prdt_t* prdt)
dberr_t err = deadlock_check(lock);
ut_ad(trx_mutex_own(m_trx));
// Move it only when it does not cause a deadlock.
if (err != DB_DEADLOCK
&& innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
&& !is_slave_replication
&& !trx_is_high_priority(lock->trx)) {
HASH_DELETE(lock_t, hash, lock_hash_get(lock->type_mode),
m_rec_id.fold(), lock);
lock_rec_insert_by_trx_age(lock);
if (lock_get_wait(lock) && !lock_rec_has_to_wait_in_queue(lock)) {
lock_grant(lock, true);
return DB_SUCCESS_LOCKED_REC;
}
}
return(err);
}
......@@ -2590,13 +2694,16 @@ static
void
lock_grant(
/*=======*/
lock_t* lock) /*!< in/out: waiting lock request */
lock_t* lock, /*!< in/out: waiting lock request */
bool owns_trx_mutex) /*!< in: whether lock->trx->mutex is owned */
{
ut_ad(lock_mutex_own());
lock_reset_lock_and_trx_wait(lock);
trx_mutex_enter(lock->trx);
if (!owns_trx_mutex) {
trx_mutex_enter(lock->trx);
}
if (lock_get_mode(lock) == LOCK_AUTO_INC) {
dict_table_t* table = lock->un_member.tab_lock.table;
......@@ -2629,7 +2736,9 @@ lock_grant(
}
}
trx_mutex_exit(lock->trx);
if (!owns_trx_mutex) {
trx_mutex_exit(lock->trx);
}
}
/**
......@@ -2893,30 +3002,64 @@ lock_rec_cancel(
trx_mutex_exit(lock->trx);
}
/*************************************************************//**
Move the lock to the head of the hash list. */
static
void
lock_rec_move_to_front(
lock_t *lock_to_move, /*!< in: lock to be moved */
ulint rec_fold) /*!< in: rec fold of the lock */
{
hash_table_t* lock_hash;
hash_cell_t* cell;
lock_t* next;
lock_grant_and_move_on_page(
hash_table_t *lock_hash,
ulint space,
ulint page_no)
{
lock_t* lock;
lock_t* previous;
ulint rec_fold = lock_rec_fold(space, page_no);
if (lock_to_move != NULL)
{
lock_hash = lock_hash_get(lock_to_move->type_mode);
// Move the target lock to the head of the list
cell = hash_get_nth_cell(lock_hash,
hash_calc_hash(rec_fold, lock_hash));
if (lock_to_move != cell->node) {
next = (lock_t *) cell->node;
cell->node = lock_to_move;
lock_to_move->hash = next;
}
}
previous = (lock_t *) hash_get_nth_cell(lock_hash,
hash_calc_hash(rec_fold, lock_hash))->node;
/* Grant locks if there are no conflicting locks ahead.
Move granted locks to the head of the list. */
for (lock = lock_rec_get_first_on_page_addr(lock_hash, space,
page_no);
lock != NULL;) {
/* If the lock is a wait lock on this page, and it does not need to wait. */
if ((lock->un_member.rec_lock.space == space)
&& (lock->un_member.rec_lock.page_no == page_no)
&& lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
bool exit_trx_mutex = false;
if (lock->trx->abort_type != TRX_SERVER_ABORT) {
ut_ad(trx_mutex_own(lock->trx));
trx_mutex_exit(lock->trx);
exit_trx_mutex = true;
}
lock_grant(lock, false);
if (exit_trx_mutex) {
ut_ad(!trx_mutex_own(lock->trx));
trx_mutex_enter(lock->trx);
}
if (previous != NULL) {
/* Move the lock to the head of the list. */
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
lock_rec_insert_to_head(lock, rec_fold);
} else {
/* Already at the head of the list. */
previous = lock;
}
/* Move on to the next lock. */
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
} else {
previous = lock;
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
}
}
}
/*************************************************************//**
......@@ -2935,8 +3078,6 @@ lock_rec_dequeue_from_page(
{
ulint space;
ulint page_no;
ulint rec_fold;
lock_t* previous = NULL;
lock_t* lock;
trx_lock_t* trx_lock;
hash_table_t* lock_hash;
......@@ -2949,7 +3090,6 @@ lock_rec_dequeue_from_page(
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
ut_ad(in_lock->index->table->n_rec_locks > 0);
in_lock->index->table->n_rec_locks--;
......@@ -2965,7 +3105,7 @@ lock_rec_dequeue_from_page(
MONITOR_DEC(MONITOR_NUM_RECLOCK);
if (innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS) {
== INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS || is_slave_replication) {
/* Check if waiting locks in the queue can now be granted:
grant locks if there are no conflicting locks ahead. Stop at
......@@ -2990,7 +3130,7 @@ lock_rec_dequeue_from_page(
exit_trx_mutex = true;
}
lock_grant(lock);
lock_grant(lock, false);
if (exit_trx_mutex) {
ut_ad(!trx_mutex_own(lock->trx));
......@@ -2999,50 +3139,7 @@ lock_rec_dequeue_from_page(
}
}
} else {
/* Grant locks if there are no conflicting locks ahead.
Move granted locks to the head of the list. */
for (lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
lock != NULL;) {
/* If the lock is a wait lock on this page, and it does not need to wait. */
if ((lock->un_member.rec_lock.space == space)
&& (lock->un_member.rec_lock.page_no == page_no)
&& lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
bool exit_trx_mutex = false;
if (lock->trx->abort_type != TRX_SERVER_ABORT) {
ut_ad(trx_mutex_own(lock->trx));
trx_mutex_exit(lock->trx);
exit_trx_mutex = true;
}
lock_grant(lock);
if (exit_trx_mutex) {
ut_ad(!trx_mutex_own(lock->trx));
trx_mutex_enter(lock->trx);
}
if (previous != NULL) {
/* Move the lock to the head of the list. */
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
lock_rec_move_to_front(lock, rec_fold);
} else {
/* Already at the head of the list. */
previous = lock;
}
/* Move on to the next lock. */
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
} else {
previous = lock;
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
}
}
lock_grant_and_move_on_page(lock_hash, space, page_no);
}
}
......@@ -4793,7 +4890,7 @@ lock_table_dequeue(
/* Grant the lock */
ut_ad(in_lock->trx != lock->trx);
lock_grant(lock);
lock_grant(lock, false);
}
}
}
......@@ -4877,6 +4974,54 @@ lock_table_for_trx(
}
/*=========================== LOCK RELEASE ==============================*/
static
void
lock_grant_and_move_on_rec(
hash_table_t *lock_hash,
lock_t *first_lock,
ulint heap_no)
{
lock_t* lock;
lock_t* previous;
ulint space;
ulint page_no;
ulint rec_fold;
space = first_lock->un_member.rec_lock.space;
page_no = first_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
previous = (lock_t *) hash_get_nth_cell(lock_hash,
hash_calc_hash(rec_fold, lock_hash))->node;
/* Grant locks if there are no conflicting locks ahead.
Move granted locks to the head of the list. */
for (lock = first_lock; lock != NULL;) {
/* If the lock is a wait lock on this page, and it does not need to wait. */
if (lock->un_member.rec_lock.space == space
&& lock->un_member.rec_lock.page_no == page_no
&& lock_rec_get_nth_bit(lock, heap_no)
&& lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
lock_grant(lock, false);
if (previous != NULL) {
/* Move the lock to the head of the list. */
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
lock_rec_insert_to_head(lock, rec_fold);
} else {
/* Already at the head of the list. */
previous = lock;
}
/* Move on to the next lock. */
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
} else {
previous = lock;
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
}
}
}
/*************************************************************//**
Removes a granted record lock of a transaction from the queue and grants
......@@ -4937,19 +5082,25 @@ lock_rec_unlock(
released:
ut_a(!lock_get_wait(lock));
lock_rec_reset_nth_bit(lock, heap_no);
if (innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS || is_slave_replication) {
/* Check if we can now grant waiting lock requests */
/* Check if we can now grant waiting lock requests */
for (lock = first_lock; lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
if (lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
for (lock = first_lock; lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
if (lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
/* Grant the lock */
ut_ad(trx != lock->trx);
lock_grant(lock);
}
}
/* Grant the lock */
ut_ad(trx != lock->trx);
lock_grant(lock, false);
}
}
} else {
lock_grant_and_move_on_rec(lock_sys->rec_hash, first_lock, heap_no);
}
lock_mutex_exit();
trx_mutex_exit(trx);
......@@ -6182,6 +6333,9 @@ lock_rec_queue_validate(
ut_a(lock_rec_has_to_wait_in_queue(lock));
}
}
ut_ad(innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS ||
lock_queue_validate(lock));
func_exit:
if (!locked_lock_trx_sys) {
......@@ -7974,7 +8128,8 @@ DeadlockChecker::get_first_lock(ulint* heap_no) const
ut_a(lock != NULL);
ut_a(lock != m_wait_lock ||
(innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS));
== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
&& !is_slave_replication));
/* Check that the lock type doesn't change. */
ut_ad(lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
......
......@@ -378,6 +378,25 @@ struct lock_stack_t {
const lock_t* lock; /*!< Current lock */
const lock_t* wait_lock; /*!< Waiting for lock */
ulint heap_no; /*!< heap number if rec lock */
/*********************************************************************//**
Checks if a waiting record lock request still has to wait in a queue.
@return lock that is causing the wait */
static
const lock_t*
lock_rec_has_to_wait_in_queue(
/*==========================*/
const lock_t* wait_lock); /*!< in: waiting record lock */
/*************************************************************//**
Grants a lock to a waiting lock request and releases the waiting transaction.
The caller must hold lock_sys->mutex. */
static
void
lock_grant(
/*=======*/
lock_t* lock, /*!< in/out: waiting lock request */
bool owns_trx_mutex); /*!< in: whether lock->trx->mutex is owned */
};
extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd);
......@@ -2010,6 +2029,138 @@ wsrep_print_wait_locks(
}
#endif /* WITH_WSREP */
/*********************************************************************//**
Check if lock1 has higher priority than lock2.
NULL has lowest priority..
If neither of them is wait lock, the first one has higher priority.
If only one of them is a wait lock, it has lower priority.
Otherwise, the one with an older transaction has higher priority.
@returns true if lock1 has higher priority, false otherwise. */
bool
has_higher_priority(
lock_t *lock1,
lock_t *lock2)
{
if (lock1 == NULL) {
return false;
} else if (lock2 == NULL) {
return true;
}
// Ask the upper server layer if any of the two trx should be prefered.
int preference = thd_deadlock_victim_preference(lock1->trx->mysql_thd, lock2->trx->mysql_thd);
if (preference == -1) {
// lock1 is preferred as a victim, so lock2 has higher priority
return false;
} else if (preference == 1) {
// lock2 is preferred as a victim, so lock1 has higher priority
return true;
}
// No preference. Compre them by wait mode and trx age.
if (!lock_get_wait(lock1)) {
return true;
} else if (!lock_get_wait(lock2)) {
return false;
}
return lock1->trx->start_time < lock2->trx->start_time;
}
/*********************************************************************//**
Insert a lock to the hash list according to the mode (whether it is a wait
lock) and the age of the transaction the it is associated with.
If the lock is not a wait lock, insert it to the head of the hash list.
Otherwise, insert it to the middle of the wait locks according to the age of
the transaciton. */
static
void
lock_rec_insert_by_trx_age(
lock_t *in_lock) /*!< in: lock to be insert */{
ulint space;
ulint page_no;
ulint rec_fold;
hash_table_t* hash;
hash_cell_t* cell;
lock_t* node;
lock_t* next;
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
hash = lock_sys->rec_hash;
cell = hash_get_nth_cell(hash,
hash_calc_hash(rec_fold, hash));
node = (lock_t *) cell->node;
// If in_lock is not a wait lock, we insert it to the head of the list.
if (node == NULL || !lock_get_wait(in_lock) || has_higher_priority(in_lock, node)) {
cell->node = in_lock;
in_lock->hash = node;
return;
}
while (node != NULL && has_higher_priority((lock_t *) node->hash,
in_lock)) {
node = (lock_t *) node->hash;
}
next = (lock_t *) node->hash;
node->hash = in_lock;
in_lock->hash = next;
}
static
bool
lock_queue_validate(
const lock_t *in_lock) /*!< in: lock whose hash list is to be validated */
{
ulint space;
ulint page_no;
ulint rec_fold;
hash_cell_t* cell;
lock_t* next;
bool wait_lock = false;
if (in_lock == NULL) {
return true;
}
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
cell = hash_get_nth_cell(lock_sys->rec_hash,
hash_calc_hash(rec_fold, lock_sys->rec_hash));
next = (lock_t *) cell->node;
while (next != NULL) {
// If this is a granted lock, check that there's no wait lock before it.
if (!lock_get_wait(next)) {
ut_ad(!wait_lock);
} else {
wait_lock = true;
}
next = next->hash;
}
return true;
}
static
void
lock_rec_insert_to_head(
lock_t *in_lock, /*!< in: lock to be insert */
ulint rec_fold) /*!< in: rec_fold of the page */
{
hash_cell_t* cell;
lock_t* node;
if (in_lock == NULL) {
return;
}
cell = hash_get_nth_cell(lock_sys->rec_hash,
hash_calc_hash(rec_fold, lock_sys->rec_hash));
node = (lock_t *) cell->node;
if (node != in_lock) {
cell->node = in_lock;
in_lock->hash = node;
}
}
/*********************************************************************//**
Creates a new record lock and inserts it to the lock queue. Does NOT check
for deadlocks or lock compatibility!
......@@ -2037,6 +2188,7 @@ lock_rec_create(
lock_t* lock;
ulint page_no;
ulint space;
ulint rec_fold;
ulint n_bits;
ulint n_bytes;
const page_t* page;
......@@ -2051,6 +2203,7 @@ lock_rec_create(
space = buf_block_get_space(block);
page_no = buf_block_get_page_no(block);
rec_fold = lock_rec_fold(space, page_no);
page = block->frame;
btr_assert_not_corrupted(block, index);
......@@ -2173,12 +2326,28 @@ lock_rec_create(
}
trx_mutex_exit(c_lock->trx);
} else {
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
&& !is_slave_replication) {
if (type_mode & LOCK_WAIT) {
HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock);
} else {
lock_rec_insert_to_head(lock, rec_fold);
}
} else {
HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock);
}
}
#else
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
&& !is_slave_replication) {
if (type_mode & LOCK_WAIT) {
HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock);
} else {
lock_rec_insert_to_head(lock, rec_fold);
}
} else {
HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock);
}
#endif /* WITH_WSREP */
lock_sys->rec_num++;
......@@ -2203,85 +2372,6 @@ lock_rec_create(
return(lock);
}
/*********************************************************************//**
Check if lock1 has higher priority than lock2.
NULL has lowest priority.
If either is a high priority transaction, the lock has higher priority.
If neither of them is wait lock, the first one has higher priority.
If only one of them is a wait lock, it has lower priority.
Otherwise, the one with an older transaction has higher priority.
@returns true if lock1 has higher priority, false otherwise. */
bool
has_higher_priority(
lock_t *lock1,
lock_t *lock2)
{
if (lock1 == NULL) {
return false;
} else if (lock2 == NULL) {
return true;
}
// Ask the upper server layer if any of the two trx should be prefered.
int preference = thd_deadlock_victim_preference(lock1->trx->mysql_thd, lock2->trx->mysql_thd);
if (preference == -1) {
// lock1 is preferred as a victim, so lock2 has higher priority
return false;
} else if (preference == 1) {
// lock2 is preferred as a victim, so lock1 has higher priority
return true;
}
// No preference. Compre them by wait mode and trx age.
if (!lock_get_wait(lock1)) {
return true;
} else if (!lock_get_wait(lock2)) {
return false;
}
return lock1->trx->start_time < lock2->trx->start_time;
}
/*********************************************************************//**
Insert a lock to the hash list according to the mode (whether it is a wait
lock) and the age of the transaction the it is associated with.
If the lock is not a wait lock, insert it to the head of the hash list.
Otherwise, insert it to the middle of the wait locks according to the age of
the transaciton. */
static
void
lock_rec_insert_by_trx_age(
lock_t *in_lock, /*!< in: lock to be insert */
bool wait) /*!< in: whether it's a wait lock */
{
ulint space;
ulint page_no;
ulint rec_fold;
hash_table_t* hash;
hash_cell_t* cell;
lock_t* node;
lock_t* next;
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
hash = lock_hash_get(in_lock->type_mode);
cell = hash_get_nth_cell(hash,
hash_calc_hash(rec_fold, hash));
node = (lock_t *) cell->node;
// If in_lock is not a wait lock, we insert it to the head of the list.
if (node == NULL || !wait || has_higher_priority(in_lock, node)) {
cell->node = in_lock;
in_lock->hash = node;
return;
}
while (node != NULL && has_higher_priority((lock_t *) node->hash,
in_lock)) {
node = (lock_t *) node->hash;
}
next = (lock_t *) node->hash;
node->hash = in_lock;
in_lock->hash = next;
}
/*********************************************************************//**
Enqueues a waiting request for a lock which cannot be granted immediately.
Checks for deadlocks.
......@@ -2386,54 +2476,58 @@ lock_rec_enqueue_waiting(
}
// Move it only when it does not cause a deadlock.
if (innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
&& !trx_is_high_priority(lock->trx)) {
HASH_DELETE(lock_t, hash, lock_hash_get(lock->type_mode),
m_rec_id.fold(), lock);
lock_rec_insert_by_trx_age(lock, m_mode & LOCK_WAIT);
if (lock_get_wait(lock) && !lock_rec_has_to_wait_in_queue(lock)) {
lock_reset_lock_and_trx_wait(lock);
return DB_SUCCESS_LOCKED_REC;
}
}
dberr_t result;
if (trx->lock.wait_lock == NULL) {
/* If there was a deadlock but we chose another
transaction as a victim, it is possible that we
already have the lock now granted! */
return(DB_SUCCESS_LOCKED_REC);
}
trx->lock.que_state = TRX_QUE_LOCK_WAIT;
result = DB_SUCCESS_LOCKED_REC;
} else {
trx->lock.que_state = TRX_QUE_LOCK_WAIT;
trx->lock.was_chosen_as_deadlock_victim = FALSE;
trx->lock.wait_started = ut_time();
trx->lock.was_chosen_as_deadlock_victim = FALSE;
trx->lock.wait_started = ut_time();
if (UNIV_UNLIKELY(trx->take_stats)) {
ut_usectime(&sec, &ms);
trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms;
}
if (UNIV_UNLIKELY(trx->take_stats)) {
ut_usectime(&sec, &ms);
trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms;
}
ut_a(que_thr_stop(thr));
ut_a(que_thr_stop(thr));
#ifdef UNIV_DEBUG
if (lock_print_waits) {
fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " in index ",
trx->id);
ut_print_name(stderr, trx, FALSE, index->name);
}
if (lock_print_waits) {
fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " in index ",
trx->id);
ut_print_name(stderr, trx, FALSE, index->name);
}
#endif /* UNIV_DEBUG */
MONITOR_INC(MONITOR_LOCKREC_WAIT);
MONITOR_INC(MONITOR_LOCKREC_WAIT);
trx->n_rec_lock_waits++;
trx->n_rec_lock_waits++;
return(DB_LOCK_WAIT);
result = DB_LOCK_WAIT;
}
// Move it only when it does not cause a deadlock.
if (innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
&& !is_slave_replication) {
HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(lock->un_member.rec_lock.space,
lock->un_member.rec_lock.page_noo), lock);
lock_rec_insert_by_trx_age(lock);
if (lock_get_wait(lock) && !lock_rec_has_to_wait_in_queue(lock)) {
lock_grant(lock, true);
result = DB_SUCCESS_LOCKED_REC;
}
}
return(result);
}
/*********************************************************************//**
......@@ -2865,13 +2959,16 @@ static
void
lock_grant(
/*=======*/
lock_t* lock) /*!< in/out: waiting lock request */
lock_t* lock, /*!< in/out: waiting lock request */
bool owns_trx_mutex) /*!< in: whether lock->trx->mutex is owned */
{
ut_ad(lock_mutex_own());
lock_reset_lock_and_trx_wait(lock);
trx_mutex_enter(lock->trx);
if (!owns_trx_mutex) {
trx_mutex_enter(lock->trx);
}
if (lock_get_mode(lock) == LOCK_AUTO_INC) {
dict_table_t* table = lock->un_member.tab_lock.table;
......@@ -2920,7 +3017,9 @@ lock_grant(
lock->wait_time = (ulint)difftime(ut_time(), lock->requested_time);
trx_mutex_exit(lock->trx);
if (!owns_trx_mutex) {
trx_mutex_exit(lock->trx);
}
}
/*************************************************************//**
......@@ -2958,30 +3057,47 @@ lock_rec_cancel(
trx_mutex_exit(lock->trx);
}
/*************************************************************//**
Move the lock to the head of the hash list. */
static
void
lock_rec_move_to_front(
lock_t *lock_to_move, /*!< in: lock to be moved */
ulint rec_fold) /*!< in: rec fold of the lock */
lock_grant_and_move_on_page(
ulint space,
ulint page_no)
{
hash_table_t* lock_hash;
hash_cell_t* cell;
lock_t* next;
lock_t* lock;
lock_t* previous;
ulint rec_fold = lock_rec_fold(space, page_no);
if (lock_to_move != NULL)
{
lock_hash = lock_hash_get(lock_to_move->type_mode);
// Move the target lock to the head of the list
cell = hash_get_nth_cell(lock_hash,
hash_calc_hash(rec_fold, lock_hash));
if (lock_to_move != cell->node) {
next = (lock_t *) cell->node;
cell->node = lock_to_move;
lock_to_move->hash = next;
}
}
previous = (lock_t *) hash_get_nth_cell(lock_sys->rec_hash,
hash_calc_hash(rec_fold, lock_sys->rec_hash))->node;
/* Grant locks if there are no conflicting locks ahead.
Move granted locks to the head of the list. */
for (lock = lock_rec_get_first_on_page_addr(lock_sys->rec_hash, space,
page_no);
lock != NULL;) {
/* If the lock is a wait lock on this page, and it does not need to wait. */
if ((lock->un_member.rec_lock.space == space)
&& (lock->un_member.rec_lock.page_no == page_no)
&& lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
lock_grant(lock, false);
if (previous != NULL) {
/* Move the lock to the head of the list. */
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
lock_rec_insert_to_head(lock, rec_fold);
} else {
/* Already at the head of the list. */
previous = lock;
}
/* Move on to the next lock. */
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
} else {
previous = lock;
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
}
}
}
/*************************************************************//**
......@@ -3028,7 +3144,7 @@ lock_rec_dequeue_from_page(
MONITOR_DEC(MONITOR_NUM_RECLOCK);
if (innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS) {
== INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS || is_slave_replication) {
/* Check if waiting locks in the queue can now be granted:
grant locks if there are no conflicting locks ahead. Stop at
......@@ -3045,41 +3161,11 @@ lock_rec_dequeue_from_page(
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
lock_grant(lock, false);
}
}
} else {
/* Grant locks if there are no conflicting locks ahead.
Move granted locks to the head of the list. */
for (lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
lock != NULL;) {
/* If the lock is a wait lock on this page, and it does not need to wait. */
if ((lock->un_member.rec_lock.space == space)
&& (lock->un_member.rec_lock.page_no == page_no)
&& lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
if (previous != NULL) {
/* Move the lock to the head of the list. */
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
lock_rec_move_to_front(lock, rec_fold);
} else {
/* Already at the head of the list. */
previous = lock;
}
/* Move on to the next lock. */
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
} else {
previous = lock;
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
}
}
lock_grant_and_move_on_page(space, page_no);
}
}
......@@ -5384,12 +5470,59 @@ lock_table_dequeue(
/* Grant the lock */
ut_ad(in_lock->trx != lock->trx);
lock_grant(lock);
lock_grant(lock, false);
}
}
}
/*=========================== LOCK RELEASE ==============================*/
static
void
lock_grant_and_move_on_rec(
lock_t *first_lock,
ulint heap_no)
{
lock_t* lock;
lock_t* previous;
ulint space;
ulint page_no;
ulint rec_fold;
space = first_lock->un_member.rec_lock.space;
page_no = first_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
previous = (lock_t *) hash_get_nth_cell(lock_sys->rec_hash,
hash_calc_hash(rec_fold, lock_sys->rec_hash))->node;
/* Grant locks if there are no conflicting locks ahead.
Move granted locks to the head of the list. */
for (lock = first_lock; lock != NULL;) {
/* If the lock is a wait lock on this page, and it does not need to wait. */
if (lock->un_member.rec_lock.space == space
&& lock->un_member.rec_lock.page_no == page_no
&& lock_rec_get_nth_bit(lock, heap_no)
&& lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
lock_grant(lock, false);
if (previous != NULL) {
/* Move the lock to the head of the list. */
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
lock_rec_insert_to_head(lock, rec_fold);
} else {
/* Already at the head of the list. */
previous = lock;
}
/* Move on to the next lock. */
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
} else {
previous = lock;
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
}
}
}
/*************************************************************//**
Removes a granted record lock of a transaction from the queue and grants
......@@ -5452,19 +5585,25 @@ lock_rec_unlock(
released:
ut_a(!lock_get_wait(lock));
lock_rec_reset_nth_bit(lock, heap_no);
if (innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS || is_slave_replication) {
/* Check if we can now grant waiting lock requests */
/* Check if we can now grant waiting lock requests */
for (lock = first_lock; lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
if (lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
for (lock = first_lock; lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
if (lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
/* Grant the lock */
ut_ad(trx != lock->trx);
lock_grant(lock);
}
}
/* Grant the lock */
ut_ad(trx != lock->trx);
lock_grant(lock, false);
}
}
} else {
lock_grant_and_move_on_rec(first_lock, heap_no);
}
lock_mutex_exit();
trx_mutex_exit(trx);
......@@ -6517,6 +6656,9 @@ lock_rec_queue_validate(
}
}
ut_ad(innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS ||
lock_queue_validate(lock));
func_exit:
if (!locked_lock_trx_sys) {
lock_mutex_exit();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment