Commit 40a44873 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Improve btree_deadlock debugfs output

This changes bch2_check_for_deadlock() to print the longest chains it
finds - when we have a deadlock because the cycle detector isn't finding
something, this will let us see what it's missing.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 943f9946
...@@ -71,11 +71,6 @@ struct lock_graph { ...@@ -71,11 +71,6 @@ struct lock_graph {
unsigned nr; unsigned nr;
}; };
static void lock_graph_pop(struct lock_graph *g)
{
closure_put(&g->g[--g->nr].trans->ref);
}
static noinline void print_cycle(struct printbuf *out, struct lock_graph *g) static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
{ {
struct trans_waiting_for_lock *i; struct trans_waiting_for_lock *i;
...@@ -87,6 +82,18 @@ static noinline void print_cycle(struct printbuf *out, struct lock_graph *g) ...@@ -87,6 +82,18 @@ static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
bch2_btree_trans_to_text(out, i->trans); bch2_btree_trans_to_text(out, i->trans);
} }
static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
{
struct trans_waiting_for_lock *i;
for (i = g->g; i != g->g + g->nr; i++) {
if (i != g->g)
prt_str(out, "<- ");
prt_printf(out, "%u ", i->trans->locking_wait.task->pid);
}
prt_newline(out);
}
static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i) static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
{ {
int ret; int ret;
...@@ -134,6 +141,21 @@ static noinline int break_cycle(struct lock_graph *g) ...@@ -134,6 +141,21 @@ static noinline int break_cycle(struct lock_graph *g)
BUG(); BUG();
} }
static void lock_graph_pop(struct lock_graph *g)
{
closure_put(&g->g[--g->nr].trans->ref);
}
static void lock_graph_pop_above(struct lock_graph *g, struct trans_waiting_for_lock *above,
struct printbuf *cycle)
{
if (g->nr > 1 && cycle)
print_chain(cycle, g);
while (g->g + g->nr > above)
lock_graph_pop(g);
}
static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
struct printbuf *cycle) struct printbuf *cycle)
{ {
...@@ -142,9 +164,8 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, ...@@ -142,9 +164,8 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
int ret = 0; int ret = 0;
for (i = g->g; i < g->g + g->nr; i++) { for (i = g->g; i < g->g + g->nr; i++) {
if (i->trans->locking != i->node_want) if (i->trans->locking != i->node_want) {
while (g->g + g->nr >= i) { lock_graph_pop_above(g, i - 1, cycle);
lock_graph_pop(g);
return 0; return 0;
} }
...@@ -185,20 +206,19 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, ...@@ -185,20 +206,19 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
return 0; return 0;
deadlock: deadlock:
while (g->nr) lock_graph_pop_above(g, g->g, cycle);
lock_graph_pop(g);
return ret; return ret;
} }
static noinline void lock_graph_remove_non_waiters(struct lock_graph *g) static noinline void lock_graph_remove_non_waiters(struct lock_graph *g,
struct printbuf *cycle)
{ {
struct trans_waiting_for_lock *i; struct trans_waiting_for_lock *i;
for (i = g->g + 1; i < g->g + g->nr; i++) for (i = g->g + 1; i < g->g + g->nr; i++)
if (i->trans->locking != i->node_want || if (i->trans->locking != i->node_want ||
i->trans->locking_wait.start_time != i[-1].lock_start_time) { i->trans->locking_wait.start_time != i[-1].lock_start_time) {
while (g->g + g->nr >= i) lock_graph_pop_above(g, i - 1, cycle);
lock_graph_pop(g);
return; return;
} }
BUG(); BUG();
...@@ -252,7 +272,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) ...@@ -252,7 +272,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
b = &READ_ONCE(path->l[top->level].b)->c; b = &READ_ONCE(path->l[top->level].b)->c;
if (unlikely(IS_ERR_OR_NULL(b))) { if (unlikely(IS_ERR_OR_NULL(b))) {
lock_graph_remove_non_waiters(&g); lock_graph_remove_non_waiters(&g, cycle);
goto next; goto next;
} }
...@@ -286,6 +306,8 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) ...@@ -286,6 +306,8 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
} }
} }
if (g.nr > 1 && cycle)
print_chain(cycle, &g);
lock_graph_pop(&g); lock_graph_pop(&g);
goto next; goto next;
} }
......
...@@ -725,10 +725,17 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, ...@@ -725,10 +725,17 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
goto out; goto out;
mutex_lock(&c->btree_trans_lock); mutex_lock(&c->btree_trans_lock);
list_for_each_entry(trans, &c->btree_trans_list, list) list_for_each_entry(trans, &c->btree_trans_list, list) {
if (bch2_check_for_deadlock(trans, &i->buf)) { if (trans->locking_wait.task->pid <= i->iter)
i->iter = 1; continue;
break;
ret = flush_buf(i);
if (ret)
return ret;
bch2_check_for_deadlock(trans, &i->buf);
i->iter = trans->locking_wait.task->pid;
} }
mutex_unlock(&c->btree_trans_lock); mutex_unlock(&c->btree_trans_lock);
out: out:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment