Commit f3e36246 authored by Jan Lindström's avatar Jan Lindström

MDEV-8869: Potential lock_sys->mutex deadlock

Analysis: We are alreading holing lock_sys mutex when we call thd::awake.
This could lead mutex deadlock if trx->current_lock_mutex_owner is not
correctly set.

Fix: Make sure that trx->current_lock_mutex_owner is correctly set.
parent 76701c6d
......@@ -4,7 +4,7 @@ Copyright (c) 2000, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2013, 2014 SkySQL Ab. All Rights Reserved.
Copyright (c) 2013, 2015 MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
......@@ -4328,6 +4328,8 @@ innobase_kill_query(
enum thd_kill_levels level) /*!< in: kill level */
{
trx_t* trx;
bool took_lock_sys = false;
DBUG_ENTER("innobase_kill_query");
DBUG_ASSERT(hton == innodb_hton_ptr);
......@@ -4353,14 +4355,24 @@ innobase_kill_query(
/* Cancel a pending lock request. */
if (owner != cur) {
ut_ad(!lock_mutex_own());
lock_mutex_enter();
took_lock_sys = true;
}
ut_ad(!trx_mutex_own(trx));
trx_mutex_enter(trx);
if (trx->lock.wait_lock) {
lock_cancel_waiting_and_release(trx->lock.wait_lock);
}
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(trx));
trx_mutex_exit(trx);
if (owner != cur) {
if (took_lock_sys) {
lock_mutex_exit();
}
}
......@@ -17320,6 +17332,7 @@ wsrep_innobase_kill_one_trx(void * const bf_thd_ptr,
WSREP_WARN("no THD for trx: %lu", victim_trx->id);
DBUG_RETURN(1);
}
if (!bf_thd) {
DBUG_PRINT("wsrep", ("no BF thd for conflicting lock"));
WSREP_WARN("no BF THD for trx: %lu", (bf_trx) ? bf_trx->id : 0);
......@@ -17343,6 +17356,7 @@ wsrep_innobase_kill_one_trx(void * const bf_thd_ptr,
wsrep_thd_UNLOCK(thd);
DBUG_RETURN(0);
}
if(wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
WSREP_DEBUG("withdraw for BF trx: %lu, state: %d",
victim_trx->id,
......@@ -17350,7 +17364,7 @@ wsrep_innobase_kill_one_trx(void * const bf_thd_ptr,
}
switch (wsrep_thd_conflict_state(thd)) {
case NO_CONFLICT:
case NO_CONFLICT:
wsrep_thd_set_conflict_state(thd, MUST_ABORT);
break;
case MUST_ABORT:
......@@ -17472,6 +17486,7 @@ wsrep_innobase_kill_one_trx(void * const bf_thd_ptr,
mysql_mutex_lock(&LOCK_wsrep_rollback);
abortees = wsrep_aborting_thd;
while (abortees && !skip_abort) {
/* check if we have a kill message for this already */
if (abortees->aborting_thd == thd) {
......@@ -17481,6 +17496,7 @@ wsrep_innobase_kill_one_trx(void * const bf_thd_ptr,
}
abortees = abortees->next;
}
if (!skip_abort) {
wsrep_aborting_thd_t aborting = (wsrep_aborting_thd_t)
my_malloc(sizeof(struct wsrep_aborting_thd),
......@@ -17522,17 +17538,17 @@ wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd,
wsrep_thd_query(bf_thd),
wsrep_thd_query(victim_thd));
if (victim_trx)
{
lock_mutex_enter();
trx_mutex_enter(victim_trx);
if (victim_trx) {
victim_trx->current_lock_mutex_owner = victim_thd;
lock_mutex_enter();
trx_mutex_enter(victim_trx);
int rcode = wsrep_innobase_kill_one_trx(bf_thd, bf_trx,
victim_trx, signal);
trx_mutex_exit(victim_trx);
lock_mutex_exit();
trx_mutex_exit(victim_trx);
victim_trx->current_lock_mutex_owner = NULL;
lock_mutex_exit();
wsrep_srv_conc_cancel_wait(victim_trx);
DBUG_RETURN(rcode);
DBUG_RETURN(rcode);
} else {
WSREP_DEBUG("victim does not have transaction");
wsrep_thd_LOCK(victim_thd);
......@@ -17540,6 +17556,7 @@ wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd,
wsrep_thd_UNLOCK(victim_thd);
wsrep_thd_awake(victim_thd, signal);
}
DBUG_RETURN(-1);
}
......
......@@ -6767,14 +6767,16 @@ lock_clust_rec_modify_check_and_lock(
lock_rec_convert_impl_to_expl(block, rec, index, offsets);
lock_mutex_enter();
ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
trx_t* trx = thr_get_trx(thr);
trx->current_lock_mutex_owner = trx->mysql_thd;
ut_ad(lock_table_has(trx, index->table, LOCK_IX));
err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
block, heap_no, index, thr);
MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
trx->current_lock_mutex_owner = NULL;
lock_mutex_exit();
ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
......@@ -6826,15 +6828,18 @@ lock_sec_rec_modify_check_and_lock(
index record, and this would not have been possible if another active
transaction had modified this secondary index record. */
trx_t* trx = thr_get_trx(thr);
lock_mutex_enter();
trx->current_lock_mutex_owner = trx->mysql_thd;
ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
ut_ad(lock_table_has(trx, index->table, LOCK_IX));
err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
block, heap_no, index, thr);
MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
trx->current_lock_mutex_owner = NULL;
lock_mutex_exit();
#ifdef UNIV_DEBUG
......@@ -6925,18 +6930,21 @@ lock_sec_rec_read_check_and_lock(
lock_rec_convert_impl_to_expl(block, rec, index, offsets);
}
trx_t* trx = thr_get_trx(thr);
lock_mutex_enter();
trx->current_lock_mutex_owner = trx->mysql_thd;
ut_ad(mode != LOCK_X
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
|| lock_table_has(trx, index->table, LOCK_IX));
ut_ad(mode != LOCK_S
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
|| lock_table_has(trx, index->table, LOCK_IS));
err = lock_rec_lock(FALSE, mode | gap_mode,
block, heap_no, index, thr);
MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
trx->current_lock_mutex_owner = NULL;
lock_mutex_exit();
ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
......@@ -6998,17 +7006,20 @@ lock_clust_rec_read_check_and_lock(
}
lock_mutex_enter();
trx_t* trx = thr_get_trx(thr);
trx->current_lock_mutex_owner = trx->mysql_thd;
ut_ad(mode != LOCK_X
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
|| lock_table_has(trx, index->table, LOCK_IX));
ut_ad(mode != LOCK_S
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
|| lock_table_has(trx, index->table, LOCK_IS));
err = lock_rec_lock(FALSE, mode | gap_mode,
block, heap_no, index, thr);
MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
trx->current_lock_mutex_owner = NULL;
lock_mutex_exit();
ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
......
......@@ -4,7 +4,7 @@ Copyright (c) 2000, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2013, 2014 SkySQL Ab. All Rights Reserved.
Copyright (c) 2013, 2015 MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
......@@ -4930,6 +4930,7 @@ innobase_kill_connection(
thd_kill_levels)
{
trx_t* trx;
bool took_lock_sys = false;
DBUG_ENTER("innobase_kill_connection");
DBUG_ASSERT(hton == innodb_hton_ptr);
......@@ -4956,16 +4957,24 @@ innobase_kill_connection(
THD *owner = trx->current_lock_mutex_owner;
if (owner != cur) {
ut_ad(!lock_mutex_own());
lock_mutex_enter();
took_lock_sys = true;
}
ut_ad(!trx_mutex_own(trx));
trx_mutex_enter(trx);
/* Cancel a pending lock request. */
if (trx->lock.wait_lock)
lock_cancel_waiting_and_release(trx->lock.wait_lock);
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(trx));
trx_mutex_exit(trx);
if (owner != cur) {
if (took_lock_sys) {
lock_mutex_exit();
}
}
......@@ -18333,6 +18342,7 @@ wsrep_innobase_kill_one_trx(void * const bf_thd_ptr,
WSREP_WARN("no THD for trx: %lu", victim_trx->id);
DBUG_RETURN(1);
}
if (!bf_thd) {
DBUG_PRINT("wsrep", ("no BF thd for conflicting lock"));
WSREP_WARN("no BF THD for trx: %lu", (bf_trx) ? bf_trx->id : 0);
......@@ -18356,6 +18366,7 @@ wsrep_innobase_kill_one_trx(void * const bf_thd_ptr,
wsrep_thd_UNLOCK(thd);
DBUG_RETURN(0);
}
if(wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
WSREP_DEBUG("withdraw for BF trx: %lu, state: %d",
victim_trx->id,
......@@ -18363,7 +18374,7 @@ wsrep_innobase_kill_one_trx(void * const bf_thd_ptr,
}
switch (wsrep_thd_conflict_state(thd)) {
case NO_CONFLICT:
case NO_CONFLICT:
wsrep_thd_set_conflict_state(thd, MUST_ABORT);
break;
case MUST_ABORT:
......@@ -18485,6 +18496,7 @@ wsrep_innobase_kill_one_trx(void * const bf_thd_ptr,
mysql_mutex_lock(&LOCK_wsrep_rollback);
abortees = wsrep_aborting_thd;
while (abortees && !skip_abort) {
/* check if we have a kill message for this already */
if (abortees->aborting_thd == thd) {
......@@ -18494,6 +18506,7 @@ wsrep_innobase_kill_one_trx(void * const bf_thd_ptr,
}
abortees = abortees->next;
}
if (!skip_abort) {
wsrep_aborting_thd_t aborting = (wsrep_aborting_thd_t)
my_malloc(sizeof(struct wsrep_aborting_thd),
......@@ -18524,7 +18537,7 @@ wsrep_innobase_kill_one_trx(void * const bf_thd_ptr,
DBUG_RETURN(0);
}
static int
static int
wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd,
my_bool signal)
{
......@@ -18535,17 +18548,17 @@ wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd,
wsrep_thd_query(bf_thd),
wsrep_thd_query(victim_thd));
if (victim_trx)
{
lock_mutex_enter();
trx_mutex_enter(victim_trx);
if (victim_trx) {
victim_trx->current_lock_mutex_owner = victim_thd;
lock_mutex_enter();
trx_mutex_enter(victim_trx);
int rcode = wsrep_innobase_kill_one_trx(bf_thd, bf_trx,
victim_trx, signal);
trx_mutex_exit(victim_trx);
lock_mutex_exit();
trx_mutex_exit(victim_trx);
victim_trx->current_lock_mutex_owner = NULL;
lock_mutex_exit();
wsrep_srv_conc_cancel_wait(victim_trx);
DBUG_RETURN(rcode);
DBUG_RETURN(rcode);
} else {
WSREP_DEBUG("victim does not have transaction");
wsrep_thd_LOCK(victim_thd);
......@@ -18553,6 +18566,7 @@ wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd,
wsrep_thd_UNLOCK(victim_thd);
wsrep_thd_awake(victim_thd, signal);
}
DBUG_RETURN(-1);
}
......
......@@ -6835,14 +6835,16 @@ lock_clust_rec_modify_check_and_lock(
lock_rec_convert_impl_to_expl(block, rec, index, offsets);
lock_mutex_enter();
ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
trx_t* trx = thr_get_trx(thr);
trx->current_lock_mutex_owner = trx->mysql_thd;
ut_ad(lock_table_has(trx, index->table, LOCK_IX));
err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
block, heap_no, index, thr);
MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
trx->current_lock_mutex_owner = NULL;
lock_mutex_exit();
ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
......@@ -6898,15 +6900,18 @@ lock_sec_rec_modify_check_and_lock(
index record, and this would not have been possible if another active
transaction had modified this secondary index record. */
trx_t* trx = thr_get_trx(thr);
lock_mutex_enter();
trx->current_lock_mutex_owner = trx->mysql_thd;
ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
ut_ad(lock_table_has(trx, index->table, LOCK_IX));
err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
block, heap_no, index, thr);
MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
trx->current_lock_mutex_owner = NULL;
lock_mutex_exit();
#ifdef UNIV_DEBUG
......@@ -7006,18 +7011,21 @@ lock_sec_rec_read_check_and_lock(
lock_rec_convert_impl_to_expl(block, rec, index, offsets);
}
trx_t* trx = thr_get_trx(thr);
lock_mutex_enter();
trx->current_lock_mutex_owner = trx->mysql_thd;
ut_ad(mode != LOCK_X
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
|| lock_table_has(trx, index->table, LOCK_IX));
ut_ad(mode != LOCK_S
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
|| lock_table_has(trx, index->table, LOCK_IS));
err = lock_rec_lock(FALSE, mode | gap_mode,
block, heap_no, index, thr);
MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
trx->current_lock_mutex_owner = NULL;
lock_mutex_exit();
ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
......@@ -7088,17 +7096,20 @@ lock_clust_rec_read_check_and_lock(
}
lock_mutex_enter();
trx_t* trx = thr_get_trx(thr);
trx->current_lock_mutex_owner = trx->mysql_thd;
ut_ad(mode != LOCK_X
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
|| lock_table_has(trx, index->table, LOCK_IX));
ut_ad(mode != LOCK_S
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
|| lock_table_has(trx, index->table, LOCK_IS));
err = lock_rec_lock(FALSE, mode | gap_mode,
block, heap_no, index, thr);
MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
trx->current_lock_mutex_owner = NULL;
lock_mutex_exit();
ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment