Commit 87abc870 authored by sensssz's avatar sensssz

Implement VATS in XtraDB and InnoDB.

parent 5058ced5
...@@ -72,6 +72,7 @@ ...@@ -72,6 +72,7 @@
#include <sys/syscall.h> #include <sys/syscall.h>
#endif #endif
bool is_slave_replication = false;
/* /*
The following is used to initialise Table_ident with a internal The following is used to initialise Table_ident with a internal
table name table name
......
...@@ -152,6 +152,8 @@ extern MYSQL_PLUGIN_IMPORT const char **errmesg; ...@@ -152,6 +152,8 @@ extern MYSQL_PLUGIN_IMPORT const char **errmesg;
extern bool volatile shutdown_in_progress; extern bool volatile shutdown_in_progress;
extern bool is_slave_replication;
extern "C" LEX_STRING * thd_query_string (MYSQL_THD thd); extern "C" LEX_STRING * thd_query_string (MYSQL_THD thd);
extern "C" char **thd_query(MYSQL_THD thd); extern "C" char **thd_query(MYSQL_THD thd);
......
...@@ -379,6 +379,22 @@ static TYPELIB innodb_default_row_format_typelib = { ...@@ -379,6 +379,22 @@ static TYPELIB innodb_default_row_format_typelib = {
NULL NULL
}; };
/** Possible values of the parameter innodb_lock_schedule_algorithm */
static const char* innodb_lock_schedule_algorithm_names[] = {
"fcfs",
"vats",
NullS
};
/** Used to define an enumerate type of the system variable
innodb_lock_schedule_algorithm. */
static TYPELIB innodb_lock_schedule_algorithm_typelib = {
array_elements(innodb_lock_schedule_algorithm_names) - 1,
"innodb_lock_schedule_algorithm_typelib",
innodb_lock_schedule_algorithm_names,
NULL
};
/* The following counter is used to convey information to InnoDB /* The following counter is used to convey information to InnoDB
about server activity: in case of normal DML ops it is not about server activity: in case of normal DML ops it is not
sensible to call srv_active_wake_master_thread after each sensible to call srv_active_wake_master_thread after each
...@@ -22784,6 +22800,18 @@ static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size, ...@@ -22784,6 +22800,18 @@ static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size,
NULL, NULL, 120, 1, 127, 0); NULL, NULL, 120, 1, 127, 0);
#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */ #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm,
PLUGIN_VAR_RQCMDARG,
"The algorithm Innodb uses for deciding which locks to grant next when"
" a lock is released. Possible values are"
" FCFS"
" grant the locks in First-Come-First-Served order;"
" VATS"
" use the Variance-Aware-Transaction-Scheduling algorithm, which"
" uses an Eldest-Transaction-First heuristic.",
NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
&innodb_lock_schedule_algorithm_typelib);
static MYSQL_SYSVAR_ULONG(buffer_pool_instances, srv_buf_pool_instances, static MYSQL_SYSVAR_ULONG(buffer_pool_instances, srv_buf_pool_instances,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Number of buffer pool instances, set to higher value on high-end machines to increase scalability", "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
...@@ -23657,6 +23685,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { ...@@ -23657,6 +23685,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(ft_sort_pll_degree), MYSQL_SYSVAR(ft_sort_pll_degree),
MYSQL_SYSVAR(large_prefix), MYSQL_SYSVAR(large_prefix),
MYSQL_SYSVAR(force_load_corrupted), MYSQL_SYSVAR(force_load_corrupted),
MYSQL_SYSVAR(lock_schedule_algorithm),
MYSQL_SYSVAR(locks_unsafe_for_binlog), MYSQL_SYSVAR(locks_unsafe_for_binlog),
MYSQL_SYSVAR(lock_wait_timeout), MYSQL_SYSVAR(lock_wait_timeout),
MYSQL_SYSVAR(page_size), MYSQL_SYSVAR(page_size),
......
...@@ -40,6 +40,17 @@ Created 5/7/1996 Heikki Tuuri ...@@ -40,6 +40,17 @@ Created 5/7/1996 Heikki Tuuri
#include "gis0rtree.h" #include "gis0rtree.h"
#include "lock0prdt.h" #include "lock0prdt.h"
/** Alternatives for innodb_lock_schedule_algorithm, which can be changed by
setting innodb_lock_schedule_algorithm. */
enum innodb_lock_schedule_algorithm_t {
/*!< First Come First Served */
INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
/*!< Variance-Aware-Transaction-Scheduling */
INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
};
extern ulong innodb_lock_schedule_algorithm;
// Forward declaration // Forward declaration
class ReadView; class ReadView;
......
...@@ -30,6 +30,7 @@ Created 5/7/1996 Heikki Tuuri ...@@ -30,6 +30,7 @@ Created 5/7/1996 Heikki Tuuri
#include "ha_prototypes.h" #include "ha_prototypes.h"
#include <mysql/service_thd_error_context.h> #include <mysql/service_thd_error_context.h>
#include <sql_class.h>
#include "lock0lock.h" #include "lock0lock.h"
#include "lock0priv.h" #include "lock0priv.h"
...@@ -58,6 +59,9 @@ Created 5/7/1996 Heikki Tuuri ...@@ -58,6 +59,9 @@ Created 5/7/1996 Heikki Tuuri
#include <mysql/service_wsrep.h> #include <mysql/service_wsrep.h>
#endif /* WITH_WSREP */ #endif /* WITH_WSREP */
/** Lock scheduling algorithm */
ulong innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
/** Total number of cached record locks */ /** Total number of cached record locks */
static const ulint REC_LOCK_CACHE = 8; static const ulint REC_LOCK_CACHE = 8;
...@@ -70,6 +74,15 @@ static const ulint TABLE_LOCK_CACHE = 8; ...@@ -70,6 +74,15 @@ static const ulint TABLE_LOCK_CACHE = 8;
/** Size in bytes, of the table lock instance */ /** Size in bytes, of the table lock instance */
static const ulint TABLE_LOCK_SIZE = sizeof(ib_lock_t); static const ulint TABLE_LOCK_SIZE = sizeof(ib_lock_t);
/*********************************************************************//**
Checks if a waiting record lock request still has to wait in a queue.
@return lock that is causing the wait */
static
const lock_t*
lock_rec_has_to_wait_in_queue(
/*==========================*/
const lock_t* wait_lock); /*!< in: waiting record lock */
/* Buffer to collect THDs to report waits for. */ /* Buffer to collect THDs to report waits for. */
struct thd_wait_reports { struct thd_wait_reports {
struct thd_wait_reports *next; /*!< List link */ struct thd_wait_reports *next; /*!< List link */
...@@ -81,6 +94,9 @@ extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd); ...@@ -81,6 +94,9 @@ extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd);
extern "C" int thd_need_wait_for(const MYSQL_THD thd); extern "C" int thd_need_wait_for(const MYSQL_THD thd);
extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd); extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
extern "C" int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
/** Deadlock checker. */ /** Deadlock checker. */
class DeadlockChecker { class DeadlockChecker {
public: public:
...@@ -1881,6 +1897,85 @@ RecLock::create( ...@@ -1881,6 +1897,85 @@ RecLock::create(
return(lock); return(lock);
} }
/*********************************************************************//**
Check if lock1 has higher priority than lock2.
NULL has lowest priority.
If either is a high priority transaction, the lock has higher priority.
If neither of them is wait lock, the first one has higher priority.
If only one of them is a wait lock, it has lower priority.
Otherwise, the one with an older transaction has higher priority.
@returns true if lock1 has higher priority, false otherwise. */
bool
has_higher_priority(
lock_t *lock1,
lock_t *lock2)
{
if (lock1 == NULL) {
return false;
} else if (lock2 == NULL) {
return true;
}
// Ask the upper server layer if any of the two trx should be prefered.
int preference = thd_deadlock_victim_preference(lock1->trx->mysql_thd, lock2->trx->mysql_thd);
if (preference == -1) {
// lock1 is preferred as a victim, so lock2 has higher priority
return false;
} else if (preference == 1) {
// lock2 is preferred as a victim, so lock1 has higher priority
return true;
}
// No preference. Compre them by wait mode and trx age.
if (!lock_get_wait(lock1)) {
return true;
} else if (!lock_get_wait(lock2)) {
return false;
}
return lock1->trx->start_time < lock2->trx->start_time;
}
/*********************************************************************//**
Insert a lock to the hash list according to the mode (whether it is a wait
lock) and the age of the transaction the it is associated with.
If the lock is not a wait lock, insert it to the head of the hash list.
Otherwise, insert it to the middle of the wait locks according to the age of
the transaciton. */
static
void
lock_rec_insert_by_trx_age(
lock_t *in_lock, /*!< in: lock to be insert */
bool wait) /*!< in: whether it's a wait lock */
{
ulint space;
ulint page_no;
ulint rec_fold;
hash_table_t* hash;
hash_cell_t* cell;
lock_t* node;
lock_t* next;
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
hash = lock_hash_get(in_lock->type_mode);
cell = hash_get_nth_cell(hash,
hash_calc_hash(rec_fold, hash));
node = (lock_t *) cell->node;
// If in_lock is not a wait lock, we insert it to the head of the list.
if (node == NULL || !wait || has_higher_priority(in_lock, node)) {
cell->node = in_lock;
in_lock->hash = node;
return;
}
while (node != NULL && has_higher_priority((lock_t *) node->hash,
in_lock)) {
node = (lock_t *) node->hash;
}
next = (lock_t *) node->hash;
node->hash = in_lock;
in_lock->hash = next;
}
/** /**
Check the outcome of the deadlock check Check the outcome of the deadlock check
@param[in,out] victim_trx Transaction selected for rollback @param[in,out] victim_trx Transaction selected for rollback
...@@ -1903,7 +1998,23 @@ RecLock::check_deadlock_result(const trx_t* victim_trx, lock_t* lock) ...@@ -1903,7 +1998,23 @@ RecLock::check_deadlock_result(const trx_t* victim_trx, lock_t* lock)
return(DB_DEADLOCK); return(DB_DEADLOCK);
} else if (m_trx->lock.wait_lock == NULL) { }
// Move it only when it does not cause a deadlock.
if (innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
&& !trx_is_high_priority(lock->trx)) {
HASH_DELETE(lock_t, hash, lock_hash_get(lock->type_mode),
m_rec_id.fold(), lock);
lock_rec_insert_by_trx_age(lock, m_mode & LOCK_WAIT);
if (lock_get_wait(lock) && !lock_rec_has_to_wait_in_queue(lock)) {
lock_reset_lock_and_trx_wait(lock);
return DB_SUCCESS_LOCKED_REC;
}
}
if (m_trx->lock.wait_lock == NULL) {
/* If there was a deadlock but we chose another /* If there was a deadlock but we chose another
transaction as a victim, it is possible that we transaction as a victim, it is possible that we
...@@ -2799,6 +2910,32 @@ lock_rec_cancel( ...@@ -2799,6 +2910,32 @@ lock_rec_cancel(
trx_mutex_exit(lock->trx); trx_mutex_exit(lock->trx);
} }
/*************************************************************//**
Move the lock to the head of the hash list. */
static
void
lock_rec_move_to_front(
lock_t *lock_to_move, /*!< in: lock to be moved */
ulint rec_fold) /*!< in: rec fold of the lock */
{
hash_table_t* lock_hash;
hash_cell_t* cell;
lock_t* next;
if (lock_to_move != NULL)
{
lock_hash = lock_hash_get(lock_to_move->type_mode);
// Move the target lock to the head of the list
cell = hash_get_nth_cell(lock_hash,
hash_calc_hash(rec_fold, lock_hash));
if (lock_to_move != cell->node) {
next = (lock_t *) cell->node;
cell->node = lock_to_move;
lock_to_move->hash = next;
}
}
}
/*************************************************************//** /*************************************************************//**
Removes a record lock request, waiting or granted, from the queue and Removes a record lock request, waiting or granted, from the queue and
grants locks to other transactions in the queue if they now are entitled grants locks to other transactions in the queue if they now are entitled
...@@ -2815,6 +2952,8 @@ lock_rec_dequeue_from_page( ...@@ -2815,6 +2952,8 @@ lock_rec_dequeue_from_page(
{ {
ulint space; ulint space;
ulint page_no; ulint page_no;
ulint rec_fold;
lock_t* previous = NULL;
lock_t* lock; lock_t* lock;
trx_lock_t* trx_lock; trx_lock_t* trx_lock;
hash_table_t* lock_hash; hash_table_t* lock_hash;
...@@ -2827,6 +2966,7 @@ lock_rec_dequeue_from_page( ...@@ -2827,6 +2966,7 @@ lock_rec_dequeue_from_page(
space = in_lock->un_member.rec_lock.space; space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no; page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
ut_ad(in_lock->index->table->n_rec_locks > 0); ut_ad(in_lock->index->table->n_rec_locks > 0);
in_lock->index->table->n_rec_locks--; in_lock->index->table->n_rec_locks--;
...@@ -2841,11 +2981,15 @@ lock_rec_dequeue_from_page( ...@@ -2841,11 +2981,15 @@ lock_rec_dequeue_from_page(
MONITOR_INC(MONITOR_RECLOCK_REMOVED); MONITOR_INC(MONITOR_RECLOCK_REMOVED);
MONITOR_DEC(MONITOR_NUM_RECLOCK); MONITOR_DEC(MONITOR_NUM_RECLOCK);
/* Check if waiting locks in the queue can now be granted: grant if (innodb_lock_schedule_algorithm
locks if there are no conflicting locks ahead. Stop at the first == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS) {
X lock that is waiting or has been granted. */
for (lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no); /* Check if waiting locks in the queue can now be granted:
grant locks if there are no conflicting locks ahead. Stop at
the first X lock that is waiting or has been granted. */
for (lock = lock_rec_get_first_on_page_addr(lock_hash, space,
page_no);
lock != NULL; lock != NULL;
lock = lock_rec_get_next_on_page(lock)) { lock = lock_rec_get_next_on_page(lock)) {
...@@ -2871,6 +3015,52 @@ lock_rec_dequeue_from_page( ...@@ -2871,6 +3015,52 @@ lock_rec_dequeue_from_page(
} }
} }
} }
} else {
/* Grant locks if there are no conflicting locks ahead.
Move granted locks to the head of the list. */
for (lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
lock != NULL;) {
/* If the lock is a wait lock on this page, and it does not need to wait. */
if ((lock->un_member.rec_lock.space == space)
&& (lock->un_member.rec_lock.page_no == page_no)
&& lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
bool exit_trx_mutex = false;
if (lock->trx->abort_type != TRX_SERVER_ABORT) {
ut_ad(trx_mutex_own(lock->trx));
trx_mutex_exit(lock->trx);
exit_trx_mutex = true;
}
lock_grant(lock);
if (exit_trx_mutex) {
ut_ad(!trx_mutex_own(lock->trx));
trx_mutex_enter(lock->trx);
}
if (previous != NULL) {
/* Move the lock to the head of the list. */
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
lock_rec_move_to_front(lock, rec_fold);
} else {
/* Already at the head of the list. */
previous = lock;
}
/* Move on to the next lock. */
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
} else {
previous = lock;
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
}
}
}
} }
/*************************************************************//** /*************************************************************//**
...@@ -7799,7 +7989,9 @@ DeadlockChecker::get_first_lock(ulint* heap_no) const ...@@ -7799,7 +7989,9 @@ DeadlockChecker::get_first_lock(ulint* heap_no) const
/* Must find at least two locks, otherwise there cannot be a /* Must find at least two locks, otherwise there cannot be a
waiting lock, secondly the first lock cannot be the wait_lock. */ waiting lock, secondly the first lock cannot be the wait_lock. */
ut_a(lock != NULL); ut_a(lock != NULL);
ut_a(lock != m_wait_lock); ut_a(lock != m_wait_lock ||
(innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS));
/* Check that the lock type doesn't change. */ /* Check that the lock type doesn't change. */
ut_ad(lock_get_type_low(lock) == lock_get_type_low(m_wait_lock)); ut_ad(lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
......
...@@ -348,6 +348,22 @@ static TYPELIB innodb_empty_free_list_algorithm_typelib = { ...@@ -348,6 +348,22 @@ static TYPELIB innodb_empty_free_list_algorithm_typelib = {
NULL NULL
}; };
/** Possible values of the parameter innodb_lock_schedule_algorithm */
static const char* innodb_lock_schedule_algorithm_names[] = {
"fcfs",
"vats",
NullS
};
/** Used to define an enumerate type of the system variable
innodb_lock_schedule_algorithm. */
static TYPELIB innodb_lock_schedule_algorithm_typelib = {
array_elements(innodb_lock_schedule_algorithm_names) - 1,
"innodb_lock_schedule_algorithm_typelib",
innodb_lock_schedule_algorithm_names,
NULL
};
/* The following counter is used to convey information to InnoDB /* The following counter is used to convey information to InnoDB
about server activity: in case of normal DML ops it is not about server activity: in case of normal DML ops it is not
sensible to call srv_active_wake_master_thread after each sensible to call srv_active_wake_master_thread after each
...@@ -20392,6 +20408,18 @@ static MYSQL_SYSVAR_ENUM(empty_free_list_algorithm, ...@@ -20392,6 +20408,18 @@ static MYSQL_SYSVAR_ENUM(empty_free_list_algorithm,
innodb_srv_empty_free_list_algorithm_validate, NULL, SRV_EMPTY_FREE_LIST_BACKOFF, innodb_srv_empty_free_list_algorithm_validate, NULL, SRV_EMPTY_FREE_LIST_BACKOFF,
&innodb_empty_free_list_algorithm_typelib); &innodb_empty_free_list_algorithm_typelib);
static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm,
PLUGIN_VAR_RQCMDARG,
"The algorithm Innodb uses for deciding which locks to grant next when"
" a lock is released. Possible values are"
" FCFS"
" grant the locks in First-Come-First-Served order;"
" VATS"
" use the Variance-Aware-Transaction-Scheduling algorithm, which"
" uses an Eldest-Transaction-First heuristic.",
NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
&innodb_lock_schedule_algorithm_typelib);
static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances, static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Number of buffer pool instances, set to higher value on high-end machines to increase scalability", "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
...@@ -21285,6 +21313,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { ...@@ -21285,6 +21313,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(ft_sort_pll_degree), MYSQL_SYSVAR(ft_sort_pll_degree),
MYSQL_SYSVAR(large_prefix), MYSQL_SYSVAR(large_prefix),
MYSQL_SYSVAR(force_load_corrupted), MYSQL_SYSVAR(force_load_corrupted),
MYSQL_SYSVAR(lock_schedule_algorithm),
MYSQL_SYSVAR(locks_unsafe_for_binlog), MYSQL_SYSVAR(locks_unsafe_for_binlog),
MYSQL_SYSVAR(lock_wait_timeout), MYSQL_SYSVAR(lock_wait_timeout),
#ifdef UNIV_LOG_ARCHIVE #ifdef UNIV_LOG_ARCHIVE
......
...@@ -47,6 +47,17 @@ extern ibool lock_print_waits; ...@@ -47,6 +47,17 @@ extern ibool lock_print_waits;
extern ulint srv_n_lock_deadlock_count; extern ulint srv_n_lock_deadlock_count;
/** Alternatives for innodb_lock_schedule_algorithm, which can be changed by
setting innodb_lock_schedule_algorithm. */
enum innodb_lock_schedule_algorithm_t {
/*!< First Come First Served */
INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
/*!< Variance-Aware-Transaction-Scheduling */
INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
};
extern ulong innodb_lock_schedule_algorithm;
/*********************************************************************//** /*********************************************************************//**
Gets the size of a lock struct. Gets the size of a lock struct.
@return size in bytes */ @return size in bytes */
......
...@@ -385,6 +385,11 @@ extern "C" int thd_need_wait_for(const MYSQL_THD thd); ...@@ -385,6 +385,11 @@ extern "C" int thd_need_wait_for(const MYSQL_THD thd);
extern "C" extern "C"
int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd); int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
/** Lock scheduling algorithm */
ulong innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
extern "C" int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
/** Stack to use during DFS search. Currently only a single stack is required /** Stack to use during DFS search. Currently only a single stack is required
because there is no parallel deadlock check. This stack is protected by because there is no parallel deadlock check. This stack is protected by
the lock_sys_t::mutex. */ the lock_sys_t::mutex. */
...@@ -2198,6 +2203,85 @@ lock_rec_create( ...@@ -2198,6 +2203,85 @@ lock_rec_create(
return(lock); return(lock);
} }
/*********************************************************************//**
Check if lock1 has higher priority than lock2.
NULL has lowest priority.
If either is a high priority transaction, the lock has higher priority.
If neither of them is wait lock, the first one has higher priority.
If only one of them is a wait lock, it has lower priority.
Otherwise, the one with an older transaction has higher priority.
@returns true if lock1 has higher priority, false otherwise. */
bool
has_higher_priority(
lock_t *lock1,
lock_t *lock2)
{
if (lock1 == NULL) {
return false;
} else if (lock2 == NULL) {
return true;
}
// Ask the upper server layer if any of the two trx should be prefered.
int preference = thd_deadlock_victim_preference(lock1->trx->mysql_thd, lock2->trx->mysql_thd);
if (preference == -1) {
// lock1 is preferred as a victim, so lock2 has higher priority
return false;
} else if (preference == 1) {
// lock2 is preferred as a victim, so lock1 has higher priority
return true;
}
// No preference. Compre them by wait mode and trx age.
if (!lock_get_wait(lock1)) {
return true;
} else if (!lock_get_wait(lock2)) {
return false;
}
return lock1->trx->start_time < lock2->trx->start_time;
}
/*********************************************************************//**
Insert a lock to the hash list according to the mode (whether it is a wait
lock) and the age of the transaction the it is associated with.
If the lock is not a wait lock, insert it to the head of the hash list.
Otherwise, insert it to the middle of the wait locks according to the age of
the transaciton. */
static
void
lock_rec_insert_by_trx_age(
lock_t *in_lock, /*!< in: lock to be insert */
bool wait) /*!< in: whether it's a wait lock */
{
ulint space;
ulint page_no;
ulint rec_fold;
hash_table_t* hash;
hash_cell_t* cell;
lock_t* node;
lock_t* next;
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
hash = lock_hash_get(in_lock->type_mode);
cell = hash_get_nth_cell(hash,
hash_calc_hash(rec_fold, hash));
node = (lock_t *) cell->node;
// If in_lock is not a wait lock, we insert it to the head of the list.
if (node == NULL || !wait || has_higher_priority(in_lock, node)) {
cell->node = in_lock;
in_lock->hash = node;
return;
}
while (node != NULL && has_higher_priority((lock_t *) node->hash,
in_lock)) {
node = (lock_t *) node->hash;
}
next = (lock_t *) node->hash;
node->hash = in_lock;
in_lock->hash = next;
}
/*********************************************************************//** /*********************************************************************//**
Enqueues a waiting request for a lock which cannot be granted immediately. Enqueues a waiting request for a lock which cannot be granted immediately.
Checks for deadlocks. Checks for deadlocks.
...@@ -2300,7 +2384,23 @@ lock_rec_enqueue_waiting( ...@@ -2300,7 +2384,23 @@ lock_rec_enqueue_waiting(
return(DB_DEADLOCK); return(DB_DEADLOCK);
} else if (trx->lock.wait_lock == NULL) { }
// Move it only when it does not cause a deadlock.
if (innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
&& !trx_is_high_priority(lock->trx)) {
HASH_DELETE(lock_t, hash, lock_hash_get(lock->type_mode),
m_rec_id.fold(), lock);
lock_rec_insert_by_trx_age(lock, m_mode & LOCK_WAIT);
if (lock_get_wait(lock) && !lock_rec_has_to_wait_in_queue(lock)) {
lock_reset_lock_and_trx_wait(lock);
return DB_SUCCESS_LOCKED_REC;
}
}
if (trx->lock.wait_lock == NULL) {
/* If there was a deadlock but we chose another /* If there was a deadlock but we chose another
transaction as a victim, it is possible that we transaction as a victim, it is possible that we
...@@ -2858,6 +2958,32 @@ lock_rec_cancel( ...@@ -2858,6 +2958,32 @@ lock_rec_cancel(
trx_mutex_exit(lock->trx); trx_mutex_exit(lock->trx);
} }
/*************************************************************//**
Move the lock to the head of the hash list. */
static
void
lock_rec_move_to_front(
lock_t *lock_to_move, /*!< in: lock to be moved */
ulint rec_fold) /*!< in: rec fold of the lock */
{
hash_table_t* lock_hash;
hash_cell_t* cell;
lock_t* next;
if (lock_to_move != NULL)
{
lock_hash = lock_hash_get(lock_to_move->type_mode);
// Move the target lock to the head of the list
cell = hash_get_nth_cell(lock_hash,
hash_calc_hash(rec_fold, lock_hash));
if (lock_to_move != cell->node) {
next = (lock_t *) cell->node;
cell->node = lock_to_move;
lock_to_move->hash = next;
}
}
}
/*************************************************************//** /*************************************************************//**
Removes a record lock request, waiting or granted, from the queue and Removes a record lock request, waiting or granted, from the queue and
grants locks to other transactions in the queue if they now are entitled grants locks to other transactions in the queue if they now are entitled
...@@ -2875,6 +3001,8 @@ lock_rec_dequeue_from_page( ...@@ -2875,6 +3001,8 @@ lock_rec_dequeue_from_page(
{ {
ulint space; ulint space;
ulint page_no; ulint page_no;
ulint rec_fold;
lock_t* previous = NULL;
lock_t* lock; lock_t* lock;
trx_lock_t* trx_lock; trx_lock_t* trx_lock;
...@@ -2886,6 +3014,7 @@ lock_rec_dequeue_from_page( ...@@ -2886,6 +3014,7 @@ lock_rec_dequeue_from_page(
space = in_lock->un_member.rec_lock.space; space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no; page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
in_lock->index->table->n_rec_locks--; in_lock->index->table->n_rec_locks--;
...@@ -2898,11 +3027,15 @@ lock_rec_dequeue_from_page( ...@@ -2898,11 +3027,15 @@ lock_rec_dequeue_from_page(
MONITOR_INC(MONITOR_RECLOCK_REMOVED); MONITOR_INC(MONITOR_RECLOCK_REMOVED);
MONITOR_DEC(MONITOR_NUM_RECLOCK); MONITOR_DEC(MONITOR_NUM_RECLOCK);
/* Check if waiting locks in the queue can now be granted: grant if (innodb_lock_schedule_algorithm
locks if there are no conflicting locks ahead. Stop at the first == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS) {
X lock that is waiting or has been granted. */
for (lock = lock_rec_get_first_on_page_addr(space, page_no); /* Check if waiting locks in the queue can now be granted:
grant locks if there are no conflicting locks ahead. Stop at
the first X lock that is waiting or has been granted. */
for (lock = lock_rec_get_first_on_page_addr(lock_hash, space,
page_no);
lock != NULL; lock != NULL;
lock = lock_rec_get_next_on_page(lock)) { lock = lock_rec_get_next_on_page(lock)) {
...@@ -2911,9 +3044,43 @@ lock_rec_dequeue_from_page( ...@@ -2911,9 +3044,43 @@ lock_rec_dequeue_from_page(
/* Grant the lock */ /* Grant the lock */
ut_ad(lock->trx != in_lock->trx); ut_ad(lock->trx != in_lock->trx);
lock_grant(lock); lock_grant(lock);
} }
} }
} else {
/* Grant locks if there are no conflicting locks ahead.
Move granted locks to the head of the list. */
for (lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
lock != NULL;) {
/* If the lock is a wait lock on this page, and it does not need to wait. */
if ((lock->un_member.rec_lock.space == space)
&& (lock->un_member.rec_lock.page_no == page_no)
&& lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
if (previous != NULL) {
/* Move the lock to the head of the list. */
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
lock_rec_move_to_front(lock, rec_fold);
} else {
/* Already at the head of the list. */
previous = lock;
}
/* Move on to the next lock. */
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
} else {
previous = lock;
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
}
}
}
} }
/*************************************************************//** /*************************************************************//**
...@@ -4118,7 +4285,8 @@ lock_get_first_lock( ...@@ -4118,7 +4285,8 @@ lock_get_first_lock(
} }
ut_a(lock != NULL); ut_a(lock != NULL);
ut_a(lock != ctx->wait_lock); ut_a(lock != ctx->wait_lock ||
innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS);
ut_ad(lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock)); ut_ad(lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock));
return(lock); return(lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment