Commit a301dc38 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Improve tracing for transaction restarts

We have a bug where we can get stuck with a process spinning in
transaction restarts - need more information.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 527087c7
...@@ -706,7 +706,8 @@ static int lock_node_check_fn(struct six_lock *lock, void *p) ...@@ -706,7 +706,8 @@ static int lock_node_check_fn(struct six_lock *lock, void *p)
*/ */
struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter, struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
const struct bkey_i *k, unsigned level, const struct bkey_i *k, unsigned level,
enum six_lock_type lock_type) enum six_lock_type lock_type,
unsigned long trace_ip)
{ {
struct btree_cache *bc = &c->btree_cache; struct btree_cache *bc = &c->btree_cache;
struct btree *b; struct btree *b;
...@@ -768,7 +769,7 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter, ...@@ -768,7 +769,7 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
btree_node_unlock(iter, level + 1); btree_node_unlock(iter, level + 1);
if (!btree_node_lock(b, k->k.p, level, iter, lock_type, if (!btree_node_lock(b, k->k.p, level, iter, lock_type,
lock_node_check_fn, (void *) k)) { lock_node_check_fn, (void *) k, trace_ip)) {
if (b->hash_val != btree_ptr_hash_val(k)) if (b->hash_val != btree_ptr_hash_val(k))
goto retry; goto retry;
return ERR_PTR(-EINTR); return ERR_PTR(-EINTR);
...@@ -936,7 +937,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, ...@@ -936,7 +937,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
bch2_bkey_unpack(parent, &tmp.k, k); bch2_bkey_unpack(parent, &tmp.k, k);
ret = bch2_btree_node_get(c, iter, &tmp.k, level, ret = bch2_btree_node_get(c, iter, &tmp.k, level,
SIX_LOCK_intent); SIX_LOCK_intent, _THIS_IP_);
if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) { if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
struct btree_iter *linked; struct btree_iter *linked;
...@@ -956,7 +957,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, ...@@ -956,7 +957,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
btree_node_unlock(iter, level); btree_node_unlock(iter, level);
ret = bch2_btree_node_get(c, iter, &tmp.k, level, ret = bch2_btree_node_get(c, iter, &tmp.k, level,
SIX_LOCK_intent); SIX_LOCK_intent, _THIS_IP_);
/* /*
* before btree_iter_relock() calls btree_iter_verify_locks(): * before btree_iter_relock() calls btree_iter_verify_locks():
......
...@@ -23,7 +23,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *); ...@@ -23,7 +23,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *, struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
const struct bkey_i *, unsigned, const struct bkey_i *, unsigned,
enum six_lock_type); enum six_lock_type, unsigned long);
struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *, struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
enum btree_id, unsigned); enum btree_id, unsigned);
......
...@@ -197,13 +197,13 @@ static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b, ...@@ -197,13 +197,13 @@ static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
unsigned level, struct btree_iter *iter, unsigned level, struct btree_iter *iter,
enum six_lock_type type, enum six_lock_type type,
six_lock_should_sleep_fn should_sleep_fn, six_lock_should_sleep_fn should_sleep_fn, void *p,
void *p) unsigned long ip)
{ {
struct btree_trans *trans = iter->trans; struct btree_trans *trans = iter->trans;
struct btree_iter *linked; struct btree_iter *linked, *deadlock_iter = NULL;
u64 start_time = local_clock(); u64 start_time = local_clock();
bool ret = true; unsigned reason = 9;
/* Check if it's safe to block: */ /* Check if it's safe to block: */
trans_for_each_iter(trans, linked) { trans_for_each_iter(trans, linked) {
...@@ -228,10 +228,13 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, ...@@ -228,10 +228,13 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
linked->locks_want = max_t(unsigned, linked->locks_want = max_t(unsigned,
linked->locks_want, linked->locks_want,
__fls(linked->nodes_locked) + 1); __fls(linked->nodes_locked) + 1);
if (!btree_iter_get_locks(linked, true, false)) if (!btree_iter_get_locks(linked, true, false)) {
ret = false; deadlock_iter = linked;
reason = 1;
}
} else { } else {
ret = false; deadlock_iter = linked;
reason = 2;
} }
} }
...@@ -247,23 +250,30 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, ...@@ -247,23 +250,30 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
max(level + 1, max_t(unsigned, max(level + 1, max_t(unsigned,
linked->locks_want, linked->locks_want,
iter->locks_want)); iter->locks_want));
if (!btree_iter_get_locks(linked, true, false)) if (!btree_iter_get_locks(linked, true, false)) {
ret = false; deadlock_iter = linked;
reason = 3;
}
} else { } else {
ret = false; deadlock_iter = linked;
reason = 4;
} }
} }
/* Must lock btree nodes in key order: */ /* Must lock btree nodes in key order: */
if ((cmp_int(iter->btree_id, linked->btree_id) ?: if ((cmp_int(iter->btree_id, linked->btree_id) ?:
-cmp_int(btree_iter_type(iter), btree_iter_type(linked))) < 0) -cmp_int(btree_iter_type(iter), btree_iter_type(linked))) < 0) {
ret = false; deadlock_iter = linked;
reason = 5;
}
if (iter->btree_id == linked->btree_id && if (iter->btree_id == linked->btree_id &&
btree_node_locked(linked, level) && btree_node_locked(linked, level) &&
bkey_cmp(pos, btree_node_pos((void *) linked->l[level].b, bkey_cmp(pos, btree_node_pos((void *) linked->l[level].b,
btree_iter_type(linked))) <= 0) btree_iter_type(linked))) <= 0) {
ret = false; deadlock_iter = linked;
reason = 6;
}
/* /*
* Recheck if this is a node we already have locked - since one * Recheck if this is a node we already have locked - since one
...@@ -277,8 +287,13 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, ...@@ -277,8 +287,13 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
} }
} }
if (unlikely(!ret)) { if (unlikely(deadlock_iter)) {
trace_trans_restart_would_deadlock(iter->trans->ip); trace_trans_restart_would_deadlock(iter->trans->ip, ip,
reason,
deadlock_iter->btree_id,
btree_iter_type(deadlock_iter),
iter->btree_id,
btree_iter_type(iter));
return false; return false;
} }
...@@ -945,7 +960,8 @@ static int lock_root_check_fn(struct six_lock *lock, void *p) ...@@ -945,7 +960,8 @@ static int lock_root_check_fn(struct six_lock *lock, void *p)
} }
static inline int btree_iter_lock_root(struct btree_iter *iter, static inline int btree_iter_lock_root(struct btree_iter *iter,
unsigned depth_want) unsigned depth_want,
unsigned long trace_ip)
{ {
struct bch_fs *c = iter->trans->c; struct bch_fs *c = iter->trans->c;
struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b; struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
...@@ -974,7 +990,8 @@ static inline int btree_iter_lock_root(struct btree_iter *iter, ...@@ -974,7 +990,8 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
lock_type = __btree_lock_want(iter, iter->level); lock_type = __btree_lock_want(iter, iter->level);
if (unlikely(!btree_node_lock(b, POS_MAX, iter->level, if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
iter, lock_type, iter, lock_type,
lock_root_check_fn, rootp))) lock_root_check_fn, rootp,
trace_ip)))
return -EINTR; return -EINTR;
if (likely(b == READ_ONCE(*rootp) && if (likely(b == READ_ONCE(*rootp) &&
...@@ -1046,7 +1063,8 @@ static noinline void btree_node_mem_ptr_set(struct btree_iter *iter, ...@@ -1046,7 +1063,8 @@ static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
btree_node_unlock(iter, plevel); btree_node_unlock(iter, plevel);
} }
static __always_inline int btree_iter_down(struct btree_iter *iter) static __always_inline int btree_iter_down(struct btree_iter *iter,
unsigned long trace_ip)
{ {
struct bch_fs *c = iter->trans->c; struct bch_fs *c = iter->trans->c;
struct btree_iter_level *l = &iter->l[iter->level]; struct btree_iter_level *l = &iter->l[iter->level];
...@@ -1060,7 +1078,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter) ...@@ -1060,7 +1078,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter)
bch2_bkey_unpack(l->b, &tmp.k, bch2_bkey_unpack(l->b, &tmp.k,
bch2_btree_node_iter_peek(&l->iter, l->b)); bch2_btree_node_iter_peek(&l->iter, l->b));
b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type); b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type, trace_ip);
if (unlikely(IS_ERR(b))) if (unlikely(IS_ERR(b)))
return PTR_ERR(b); return PTR_ERR(b);
...@@ -1084,7 +1102,7 @@ static void btree_iter_up(struct btree_iter *iter) ...@@ -1084,7 +1102,7 @@ static void btree_iter_up(struct btree_iter *iter)
btree_node_unlock(iter, iter->level++); btree_node_unlock(iter, iter->level++);
} }
static int btree_iter_traverse_one(struct btree_iter *); static int btree_iter_traverse_one(struct btree_iter *, unsigned long);
static int __btree_iter_traverse_all(struct btree_trans *trans, int ret) static int __btree_iter_traverse_all(struct btree_trans *trans, int ret)
{ {
...@@ -1109,6 +1127,7 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret) ...@@ -1109,6 +1127,7 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret)
bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx); bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx);
#undef btree_iter_cmp_by_idx #undef btree_iter_cmp_by_idx
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
cond_resched();
if (unlikely(ret == -ENOMEM)) { if (unlikely(ret == -ENOMEM)) {
struct closure cl; struct closure cl;
...@@ -1139,7 +1158,7 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret) ...@@ -1139,7 +1158,7 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret)
if (!(trans->iters_linked & (1ULL << idx))) if (!(trans->iters_linked & (1ULL << idx)))
continue; continue;
ret = btree_iter_traverse_one(&trans->iters[idx]); ret = btree_iter_traverse_one(&trans->iters[idx], _THIS_IP_);
if (ret) if (ret)
goto retry_all; goto retry_all;
} }
...@@ -1202,7 +1221,8 @@ static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter, ...@@ -1202,7 +1221,8 @@ static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
* On error, caller (peek_node()/peek_key()) must return NULL; the error is * On error, caller (peek_node()/peek_key()) must return NULL; the error is
* stashed in the iterator and returned from bch2_trans_exit(). * stashed in the iterator and returned from bch2_trans_exit().
*/ */
static int btree_iter_traverse_one(struct btree_iter *iter) static int btree_iter_traverse_one(struct btree_iter *iter,
unsigned long trace_ip)
{ {
unsigned depth_want = iter->level; unsigned depth_want = iter->level;
...@@ -1249,8 +1269,8 @@ static int btree_iter_traverse_one(struct btree_iter *iter) ...@@ -1249,8 +1269,8 @@ static int btree_iter_traverse_one(struct btree_iter *iter)
*/ */
while (iter->level > depth_want) { while (iter->level > depth_want) {
int ret = btree_iter_node(iter, iter->level) int ret = btree_iter_node(iter, iter->level)
? btree_iter_down(iter) ? btree_iter_down(iter, trace_ip)
: btree_iter_lock_root(iter, depth_want); : btree_iter_lock_root(iter, depth_want, trace_ip);
if (unlikely(ret)) { if (unlikely(ret)) {
if (ret == 1) if (ret == 1)
return 0; return 0;
...@@ -1281,7 +1301,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) ...@@ -1281,7 +1301,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
int ret; int ret;
ret = bch2_trans_cond_resched(trans) ?: ret = bch2_trans_cond_resched(trans) ?:
btree_iter_traverse_one(iter); btree_iter_traverse_one(iter, _RET_IP_);
if (unlikely(ret)) if (unlikely(ret))
ret = __btree_iter_traverse_all(trans, ret); ret = __btree_iter_traverse_all(trans, ret);
......
...@@ -242,7 +242,7 @@ int bch2_btree_iter_traverse_cached(struct btree_iter *iter) ...@@ -242,7 +242,7 @@ int bch2_btree_iter_traverse_cached(struct btree_iter *iter)
enum six_lock_type lock_want = __btree_lock_want(iter, 0); enum six_lock_type lock_want = __btree_lock_want(iter, 0);
if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want, if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want,
bkey_cached_check_fn, iter)) { bkey_cached_check_fn, iter, _THIS_IP_)) {
if (ck->key.btree_id != iter->btree_id || if (ck->key.btree_id != iter->btree_id ||
bkey_cmp(ck->key.pos, iter->pos)) { bkey_cmp(ck->key.pos, iter->pos)) {
goto retry; goto retry;
......
...@@ -175,13 +175,15 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans, ...@@ -175,13 +175,15 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans,
bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned, bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned,
struct btree_iter *, enum six_lock_type, struct btree_iter *, enum six_lock_type,
six_lock_should_sleep_fn, void *); six_lock_should_sleep_fn, void *,
unsigned long);
static inline bool btree_node_lock(struct btree *b, static inline bool btree_node_lock(struct btree *b,
struct bpos pos, unsigned level, struct bpos pos, unsigned level,
struct btree_iter *iter, struct btree_iter *iter,
enum six_lock_type type, enum six_lock_type type,
six_lock_should_sleep_fn should_sleep_fn, void *p) six_lock_should_sleep_fn should_sleep_fn, void *p,
unsigned long ip)
{ {
struct btree_trans *trans = iter->trans; struct btree_trans *trans = iter->trans;
bool ret; bool ret;
...@@ -199,7 +201,7 @@ static inline bool btree_node_lock(struct btree *b, ...@@ -199,7 +201,7 @@ static inline bool btree_node_lock(struct btree *b,
ret = likely(six_trylock_type(&b->c.lock, type)) || ret = likely(six_trylock_type(&b->c.lock, type)) ||
btree_node_lock_increment(trans, b, level, type) || btree_node_lock_increment(trans, b, level, type) ||
__bch2_btree_node_lock(b, pos, level, iter, type, __bch2_btree_node_lock(b, pos, level, iter, type,
should_sleep_fn, p); should_sleep_fn, p, ip);
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
trans->locking = NULL; trans->locking = NULL;
......
...@@ -536,9 +536,46 @@ DEFINE_EVENT(transaction_restart, trans_restart_btree_node_reused, ...@@ -536,9 +536,46 @@ DEFINE_EVENT(transaction_restart, trans_restart_btree_node_reused,
TP_ARGS(ip) TP_ARGS(ip)
); );
DEFINE_EVENT(transaction_restart, trans_restart_would_deadlock, TRACE_EVENT(trans_restart_would_deadlock,
TP_PROTO(unsigned long ip), TP_PROTO(unsigned long trans_ip,
TP_ARGS(ip) unsigned long caller_ip,
unsigned reason,
enum btree_id have_btree_id,
unsigned have_iter_type,
enum btree_id want_btree_id,
unsigned want_iter_type),
TP_ARGS(trans_ip, caller_ip, reason,
have_btree_id, have_iter_type,
want_btree_id, want_iter_type),
TP_STRUCT__entry(
__field(unsigned long, trans_ip )
__field(unsigned long, caller_ip )
__field(u8, reason )
__field(u8, have_btree_id )
__field(u8, have_iter_type )
__field(u8, want_btree_id )
__field(u8, want_iter_type )
),
TP_fast_assign(
__entry->trans_ip = trans_ip;
__entry->caller_ip = caller_ip;
__entry->reason = reason;
__entry->have_btree_id = have_btree_id;
__entry->have_iter_type = have_iter_type;
__entry->want_btree_id = want_btree_id;
__entry->want_iter_type = want_iter_type;
),
TP_printk("%pF %pF because %u have %u:%u want %u:%u",
(void *) __entry->trans_ip,
(void *) __entry->caller_ip,
__entry->reason,
__entry->have_btree_id,
__entry->have_iter_type,
__entry->want_btree_id,
__entry->want_iter_type)
); );
TRACE_EVENT(trans_restart_iters_realloced, TRACE_EVENT(trans_restart_iters_realloced,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment