Commit ac351578 authored by Dmitry Lenev's avatar Dmitry Lenev

A temporary workaround for bug #56405 "Deadlock in the

MDL deadlock detector".

Deadlock could have occurred when workload containing mix
of DML, DDL and FLUSH TABLES statements affecting same
set of tables was executed in heavily concurrent environment.

This deadlock occurred when several connections tried to
perform deadlock detection in metadata locking subsystem.
The first connection started traversing wait-for graph,
encountered sub-graph representing wait for flush, acquired
LOCK_open and dived into sub-graph inspection. When it has
encounterd sub-graph corresponding to wait for metadata lock
and blocked while trying to acquire rd-lock on
MDL_lock::m_rwlock (*) protecting this subgraph, since some
other thread had wr-lock on it. When this wr-lock was released
it could have happened (if there was other pending wr-lock
against this rwlock) that rd-lock from the first connection
was left unsatisfied but at the same time new rd-lock request
from the second connection sneaked in and was satisfied (for
this to be possible second rd- request should come exactly
after wr-lock is released but before pending wr-lock manages
to grab rwlock, which is possible both on Linux and in our
own rwlock implementation). If this second connection
continued traversing wait-for graph and encountered sub-graph
representing wait for flush it tried to acquire LOCK_open
and thus deadlock was created.

This patch tries to workaround this problem but not allowing
deadlock detector to lock LOCK_open mutex if some other thread
doing deadlock detection already owns it and current search
depth is greater than 0. Instead deadlock is reported.

Other possible solutions are either known to have negative
effects on performance or require much more time for proper
implementation and testing.

No test case is provided as this bug is very hard to repeat
in MTR environment but is repeatable with the help of RQG
tests.
parent 8fd74f66
...@@ -124,7 +124,6 @@ public: ...@@ -124,7 +124,6 @@ public:
Deadlock_detection_visitor(MDL_context *start_node_arg) Deadlock_detection_visitor(MDL_context *start_node_arg)
: m_start_node(start_node_arg), : m_start_node(start_node_arg),
m_victim(NULL), m_victim(NULL),
m_current_search_depth(0),
m_found_deadlock(FALSE) m_found_deadlock(FALSE)
{} {}
virtual bool enter_node(MDL_context *node); virtual bool enter_node(MDL_context *node);
...@@ -133,6 +132,8 @@ public: ...@@ -133,6 +132,8 @@ public:
virtual bool inspect_edge(MDL_context *dest); virtual bool inspect_edge(MDL_context *dest);
MDL_context *get_victim() const { return m_victim; } MDL_context *get_victim() const { return m_victim; }
void abort_traversal(MDL_context *node);
private: private:
/** /**
Change the deadlock victim to a new one if it has lower deadlock Change the deadlock victim to a new one if it has lower deadlock
...@@ -147,13 +148,6 @@ private: ...@@ -147,13 +148,6 @@ private:
MDL_context *m_start_node; MDL_context *m_start_node;
/** If a deadlock is found, the context that identifies the victim. */ /** If a deadlock is found, the context that identifies the victim. */
MDL_context *m_victim; MDL_context *m_victim;
/** Set to the 0 at start. Increased whenever
we descend into another MDL context (aka traverse to the next
wait-for graph node). When MAX_SEARCH_DEPTH is reached, we
assume that a deadlock is found, even if we have not found a
loop.
*/
uint m_current_search_depth;
/** TRUE if we found a deadlock. */ /** TRUE if we found a deadlock. */
bool m_found_deadlock; bool m_found_deadlock;
/** /**
...@@ -187,7 +181,7 @@ private: ...@@ -187,7 +181,7 @@ private:
bool Deadlock_detection_visitor::enter_node(MDL_context *node) bool Deadlock_detection_visitor::enter_node(MDL_context *node)
{ {
m_found_deadlock= ++m_current_search_depth >= MAX_SEARCH_DEPTH; m_found_deadlock= m_current_search_depth >= MAX_SEARCH_DEPTH;
if (m_found_deadlock) if (m_found_deadlock)
{ {
DBUG_ASSERT(! m_victim); DBUG_ASSERT(! m_victim);
...@@ -207,7 +201,6 @@ bool Deadlock_detection_visitor::enter_node(MDL_context *node) ...@@ -207,7 +201,6 @@ bool Deadlock_detection_visitor::enter_node(MDL_context *node)
void Deadlock_detection_visitor::leave_node(MDL_context *node) void Deadlock_detection_visitor::leave_node(MDL_context *node)
{ {
--m_current_search_depth;
if (m_found_deadlock) if (m_found_deadlock)
opt_change_victim_to(node); opt_change_victim_to(node);
} }
...@@ -251,6 +244,21 @@ Deadlock_detection_visitor::opt_change_victim_to(MDL_context *new_victim) ...@@ -251,6 +244,21 @@ Deadlock_detection_visitor::opt_change_victim_to(MDL_context *new_victim)
} }
/**
Abort traversal of a wait-for graph and report a deadlock.
@param node Node which we were about to visit when abort
was initiated.
*/
void Deadlock_detection_visitor::abort_traversal(MDL_context *node)
{
DBUG_ASSERT(! m_victim);
m_found_deadlock= TRUE;
opt_change_victim_to(node);
}
/** /**
Get a bit corresponding to enum_mdl_type value in a granted/waiting bitmaps Get a bit corresponding to enum_mdl_type value in a granted/waiting bitmaps
and compatibility matrices. and compatibility matrices.
...@@ -2056,8 +2064,13 @@ bool MDL_lock::visit_subgraph(MDL_ticket *waiting_ticket, ...@@ -2056,8 +2064,13 @@ bool MDL_lock::visit_subgraph(MDL_ticket *waiting_ticket,
are visiting it but this is OK: in the worst case we might do some are visiting it but this is OK: in the worst case we might do some
extra work and one more context might be chosen as a victim. extra work and one more context might be chosen as a victim.
*/ */
++gvisitor->m_current_search_depth;
if (gvisitor->enter_node(src_ctx)) if (gvisitor->enter_node(src_ctx))
{
--gvisitor->m_current_search_depth;
goto end; goto end;
}
/* /*
We do a breadth-first search first -- that is, inspect all We do a breadth-first search first -- that is, inspect all
...@@ -2114,6 +2127,7 @@ bool MDL_lock::visit_subgraph(MDL_ticket *waiting_ticket, ...@@ -2114,6 +2127,7 @@ bool MDL_lock::visit_subgraph(MDL_ticket *waiting_ticket,
end_leave_node: end_leave_node:
gvisitor->leave_node(src_ctx); gvisitor->leave_node(src_ctx);
--gvisitor->m_current_search_depth;
end: end:
mysql_prlock_unlock(&m_rwlock); mysql_prlock_unlock(&m_rwlock);
......
...@@ -385,7 +385,10 @@ public: ...@@ -385,7 +385,10 @@ public:
virtual bool inspect_edge(MDL_context *dest) = 0; virtual bool inspect_edge(MDL_context *dest) = 0;
virtual ~MDL_wait_for_graph_visitor(); virtual ~MDL_wait_for_graph_visitor();
MDL_wait_for_graph_visitor() :m_lock_open_count(0) {} MDL_wait_for_graph_visitor() :m_lock_open_count(0),
m_current_search_depth(0)
{ }
virtual void abort_traversal(MDL_context *node) = 0;
public: public:
/** /**
XXX, hack: During deadlock search, we may need to XXX, hack: During deadlock search, we may need to
...@@ -396,6 +399,17 @@ public: ...@@ -396,6 +399,17 @@ public:
LOCK_open since it has significant performance impacts. LOCK_open since it has significant performance impacts.
*/ */
uint m_lock_open_count; uint m_lock_open_count;
/**
Set to the 0 at start. Increased whenever
we descend into another MDL context (aka traverse to the next
wait-for graph node). When MAX_SEARCH_DEPTH is reached, we
assume that a deadlock is found, even if we have not found a
loop.
XXX: This member belongs to this class only temporarily until
bug #56405 is fixed.
*/
uint m_current_search_depth;
}; };
/** /**
......
...@@ -100,6 +100,8 @@ bool No_such_table_error_handler::safely_trapped_errors() ...@@ -100,6 +100,8 @@ bool No_such_table_error_handler::safely_trapped_errors()
TABLE_SHAREs, refresh_version and the table id counter. TABLE_SHAREs, refresh_version and the table id counter.
*/ */
mysql_mutex_t LOCK_open; mysql_mutex_t LOCK_open;
mysql_mutex_t LOCK_dd_owns_lock_open;
uint dd_owns_lock_open= 0;
#ifdef HAVE_PSI_INTERFACE #ifdef HAVE_PSI_INTERFACE
static PSI_mutex_key key_LOCK_open; static PSI_mutex_key key_LOCK_open;
...@@ -298,6 +300,7 @@ bool table_def_init(void) ...@@ -298,6 +300,7 @@ bool table_def_init(void)
init_tdc_psi_keys(); init_tdc_psi_keys();
#endif #endif
mysql_mutex_init(key_LOCK_open, &LOCK_open, MY_MUTEX_INIT_FAST); mysql_mutex_init(key_LOCK_open, &LOCK_open, MY_MUTEX_INIT_FAST);
mysql_mutex_init(NULL, &LOCK_dd_owns_lock_open, MY_MUTEX_INIT_FAST);
oldest_unused_share= &end_of_unused_share; oldest_unused_share= &end_of_unused_share;
end_of_unused_share.prev= &oldest_unused_share; end_of_unused_share.prev= &oldest_unused_share;
...@@ -341,6 +344,7 @@ void table_def_free(void) ...@@ -341,6 +344,7 @@ void table_def_free(void)
table_def_inited= 0; table_def_inited= 0;
/* Free table definitions. */ /* Free table definitions. */
my_hash_free(&table_def_cache); my_hash_free(&table_def_cache);
mysql_mutex_destroy(&LOCK_dd_owns_lock_open);
mysql_mutex_destroy(&LOCK_open); mysql_mutex_destroy(&LOCK_open);
} }
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
......
...@@ -71,6 +71,8 @@ enum enum_tdc_remove_table_type {TDC_RT_REMOVE_ALL, TDC_RT_REMOVE_NOT_OWN, ...@@ -71,6 +71,8 @@ enum enum_tdc_remove_table_type {TDC_RT_REMOVE_ALL, TDC_RT_REMOVE_NOT_OWN,
bool check_dup(const char *db, const char *name, TABLE_LIST *tables); bool check_dup(const char *db, const char *name, TABLE_LIST *tables);
extern mysql_mutex_t LOCK_open; extern mysql_mutex_t LOCK_open;
extern mysql_mutex_t LOCK_dd_owns_lock_open;
extern uint dd_owns_lock_open;
bool table_cache_init(void); bool table_cache_init(void);
void table_cache_free(void); void table_cache_free(void);
bool table_def_init(void); bool table_def_init(void);
......
...@@ -3085,7 +3085,30 @@ bool TABLE_SHARE::visit_subgraph(Wait_for_flush *wait_for_flush, ...@@ -3085,7 +3085,30 @@ bool TABLE_SHARE::visit_subgraph(Wait_for_flush *wait_for_flush,
holding a write-lock on MDL_lock::m_rwlock. holding a write-lock on MDL_lock::m_rwlock.
*/ */
if (gvisitor->m_lock_open_count++ == 0) if (gvisitor->m_lock_open_count++ == 0)
{
/*
To circumvent bug #56405 "Deadlock in the MDL deadlock detector"
we don't try to lock LOCK_open mutex if some thread doing
deadlock detection already owns it and current search depth is
greater than 0. Instead we report a deadlock.
TODO/FIXME: The proper fix for this bug is to use rwlocks for
protection of table shares/instead of LOCK_open.
Unfortunately it requires more effort/has significant
performance effect.
*/
mysql_mutex_lock(&LOCK_dd_owns_lock_open);
if (gvisitor->m_current_search_depth > 0 && dd_owns_lock_open > 0)
{
mysql_mutex_unlock(&LOCK_dd_owns_lock_open);
--gvisitor->m_lock_open_count;
gvisitor->abort_traversal(src_ctx);
return TRUE;
}
++dd_owns_lock_open;
mysql_mutex_unlock(&LOCK_dd_owns_lock_open);
mysql_mutex_lock(&LOCK_open); mysql_mutex_lock(&LOCK_open);
}
I_P_List_iterator <TABLE, TABLE_share> tables_it(used_tables); I_P_List_iterator <TABLE, TABLE_share> tables_it(used_tables);
...@@ -3100,8 +3123,12 @@ bool TABLE_SHARE::visit_subgraph(Wait_for_flush *wait_for_flush, ...@@ -3100,8 +3123,12 @@ bool TABLE_SHARE::visit_subgraph(Wait_for_flush *wait_for_flush,
goto end; goto end;
} }
++gvisitor->m_current_search_depth;
if (gvisitor->enter_node(src_ctx)) if (gvisitor->enter_node(src_ctx))
{
--gvisitor->m_current_search_depth;
goto end; goto end;
}
while ((table= tables_it++)) while ((table= tables_it++))
{ {
...@@ -3124,10 +3151,16 @@ bool TABLE_SHARE::visit_subgraph(Wait_for_flush *wait_for_flush, ...@@ -3124,10 +3151,16 @@ bool TABLE_SHARE::visit_subgraph(Wait_for_flush *wait_for_flush,
end_leave_node: end_leave_node:
gvisitor->leave_node(src_ctx); gvisitor->leave_node(src_ctx);
--gvisitor->m_current_search_depth;
end: end:
if (gvisitor->m_lock_open_count-- == 1) if (gvisitor->m_lock_open_count-- == 1)
{
mysql_mutex_unlock(&LOCK_open); mysql_mutex_unlock(&LOCK_open);
mysql_mutex_lock(&LOCK_dd_owns_lock_open);
--dd_owns_lock_open;
mysql_mutex_unlock(&LOCK_dd_owns_lock_open);
}
return result; return result;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment