Commit b3f5620f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'bcachefs-2024-08-08' of git://evilpiepirate.org/bcachefs

Pull bcachefs fixes from Kent Overstreet:
 "Assorted little stuff:

   - lockdep fixup for lockdep_set_notrack_class()

   - we can now remove a device when using erasure coding without
     deadlocking, though we still hit other issues

   - the 'allocator stuck' timeout is now configurable, and messages are
     ratelimited. The default timeout has been increased from 10 seconds
     to 30"

* tag 'bcachefs-2024-08-08' of git://evilpiepirate.org/bcachefs:
  bcachefs: Use bch2_wait_on_allocator() in btree node alloc path
  bcachefs: Make allocator stuck timeout configurable, ratelimit messages
  bcachefs: Add missing path_traverse() to btree_iter_next_node()
  bcachefs: ec should not allocate from ro devs
  bcachefs: Improved allocator debugging for ec
  bcachefs: Add missing bch2_trans_begin() call
  bcachefs: Add a comment for bucket helper types
  bcachefs: Don't rely on implicit unsigned -> signed integer conversion
  lockdep: Fix lockdep_set_notrack_class() for CONFIG_LOCK_STAT
  bcachefs: Fix double free of ca->buckets_nouse
parents cb5b81bc 73dc1656
...@@ -82,6 +82,14 @@ static inline bool bucket_data_type_mismatch(enum bch_data_type bucket, ...@@ -82,6 +82,14 @@ static inline bool bucket_data_type_mismatch(enum bch_data_type bucket,
bucket_data_type(bucket) != bucket_data_type(ptr); bucket_data_type(bucket) != bucket_data_type(ptr);
} }
/*
* It is my general preference to use unsigned types for unsigned quantities -
* however, these helpers are used in disk accounting calculations run by
* triggers where the output will be negated and added to an s64. unsigned is
* right out even though all these quantities will fit in 32 bits, since it
* won't be sign extended correctly; u64 will negate "correctly", but s64 is the
* simpler option here.
*/
static inline s64 bch2_bucket_sectors_total(struct bch_alloc_v4 a) static inline s64 bch2_bucket_sectors_total(struct bch_alloc_v4 a)
{ {
return a.stripe_sectors + a.dirty_sectors + a.cached_sectors; return a.stripe_sectors + a.dirty_sectors + a.cached_sectors;
...@@ -166,7 +174,7 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, ...@@ -166,7 +174,7 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
* avoid overflowing LRU_TIME_BITS on a corrupted fs, when * avoid overflowing LRU_TIME_BITS on a corrupted fs, when
* bucket_sectors_dirty is (much) bigger than bucket_size * bucket_sectors_dirty is (much) bigger than bucket_size
*/ */
u64 d = min(bch2_bucket_sectors_dirty(a), u64 d = min_t(s64, bch2_bucket_sectors_dirty(a),
ca->mi.bucket_size); ca->mi.bucket_size);
return div_u64(d * (1ULL << 31), ca->mi.bucket_size); return div_u64(d * (1ULL << 31), ca->mi.bucket_size);
......
...@@ -1603,7 +1603,8 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope ...@@ -1603,7 +1603,8 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope
prt_newline(out); prt_newline(out);
} }
void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c,
struct bch_dev *ca)
{ {
struct open_bucket *ob; struct open_bucket *ob;
...@@ -1613,7 +1614,8 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) ...@@ -1613,7 +1614,8 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
ob++) { ob++) {
spin_lock(&ob->lock); spin_lock(&ob->lock);
if (ob->valid && !ob->on_partial_list) if (ob->valid && !ob->on_partial_list &&
(!ca || ob->dev == ca->dev_idx))
bch2_open_bucket_to_text(out, c, ob); bch2_open_bucket_to_text(out, c, ob);
spin_unlock(&ob->lock); spin_unlock(&ob->lock);
} }
...@@ -1756,11 +1758,12 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) ...@@ -1756,11 +1758,12 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats)); prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats));
} }
void bch2_print_allocator_stuck(struct bch_fs *c) static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
{ {
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
prt_printf(&buf, "Allocator stuck? Waited for 10 seconds\n"); prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n",
c->opts.allocator_stuck_timeout);
prt_printf(&buf, "Allocator debug:\n"); prt_printf(&buf, "Allocator debug:\n");
printbuf_indent_add(&buf, 2); printbuf_indent_add(&buf, 2);
...@@ -1790,3 +1793,24 @@ void bch2_print_allocator_stuck(struct bch_fs *c) ...@@ -1790,3 +1793,24 @@ void bch2_print_allocator_stuck(struct bch_fs *c)
bch2_print_string_as_lines(KERN_ERR, buf.buf); bch2_print_string_as_lines(KERN_ERR, buf.buf);
printbuf_exit(&buf); printbuf_exit(&buf);
} }
static inline unsigned allocator_wait_timeout(struct bch_fs *c)
{
if (c->allocator_last_stuck &&
time_after(c->allocator_last_stuck + HZ * 60 * 2, jiffies))
return 0;
return c->opts.allocator_stuck_timeout * HZ;
}
void __bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
{
unsigned t = allocator_wait_timeout(c);
if (t && closure_sync_timeout(cl, t)) {
c->allocator_last_stuck = jiffies;
bch2_print_allocator_stuck(c);
}
closure_sync(cl);
}
...@@ -223,7 +223,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp ...@@ -223,7 +223,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp
void bch2_fs_allocator_foreground_init(struct bch_fs *); void bch2_fs_allocator_foreground_init(struct bch_fs *);
void bch2_open_bucket_to_text(struct printbuf *, struct bch_fs *, struct open_bucket *); void bch2_open_bucket_to_text(struct printbuf *, struct bch_fs *, struct open_bucket *);
void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *); void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *, struct bch_dev *);
void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *); void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *);
void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
...@@ -231,6 +231,11 @@ void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); ...@@ -231,6 +231,11 @@ void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *); void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *);
void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *); void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *);
void bch2_print_allocator_stuck(struct bch_fs *); void __bch2_wait_on_allocator(struct bch_fs *, struct closure *);
static inline void bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
{
if (cl->closure_get_happened)
__bch2_wait_on_allocator(c, cl);
}
#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */ #endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
...@@ -893,6 +893,8 @@ struct bch_fs { ...@@ -893,6 +893,8 @@ struct bch_fs {
struct bch_fs_usage_base __percpu *usage; struct bch_fs_usage_base __percpu *usage;
u64 __percpu *online_reserved; u64 __percpu *online_reserved;
unsigned long allocator_last_stuck;
struct io_clock io_clock[2]; struct io_clock io_clock[2];
/* JOURNAL SEQ BLACKLIST */ /* JOURNAL SEQ BLACKLIST */
......
...@@ -836,6 +836,8 @@ LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI, ...@@ -836,6 +836,8 @@ LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI,
LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
struct bch_sb, flags[5], 0, 16); struct bch_sb, flags[5], 0, 16);
LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT,
struct bch_sb, flags[5], 16, 32);
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{ {
......
...@@ -1921,6 +1921,11 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) ...@@ -1921,6 +1921,11 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
bch2_trans_verify_not_in_restart(trans); bch2_trans_verify_not_in_restart(trans);
bch2_btree_iter_verify(iter); bch2_btree_iter_verify(iter);
ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
if (ret)
goto err;
struct btree_path *path = btree_iter_path(trans, iter); struct btree_path *path = btree_iter_path(trans, iter);
/* already at end? */ /* already at end? */
......
...@@ -1264,7 +1264,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ...@@ -1264,7 +1264,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl); ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl);
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
closure_sync(&cl); bch2_wait_on_allocator(c, &cl);
} while (bch2_err_matches(ret, BCH_ERR_operation_blocked)); } while (bch2_err_matches(ret, BCH_ERR_operation_blocked));
} }
......
...@@ -1809,6 +1809,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ ...@@ -1809,6 +1809,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity); BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity);
BUG_ON(v->nr_redundant != h->s->nr_parity); BUG_ON(v->nr_redundant != h->s->nr_parity);
/* * We bypass the sector allocator which normally does this: */
bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) { for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) {
__clear_bit(v->ptrs[i].dev, devs.d); __clear_bit(v->ptrs[i].dev, devs.d);
if (i < h->s->nr_data) if (i < h->s->nr_data)
...@@ -2235,6 +2238,23 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c) ...@@ -2235,6 +2238,23 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
mutex_unlock(&c->ec_stripes_heap_lock); mutex_unlock(&c->ec_stripes_heap_lock);
} }
static void bch2_new_stripe_to_text(struct printbuf *out, struct bch_fs *c,
struct ec_stripe_new *s)
{
prt_printf(out, "\tidx %llu blocks %u+%u allocated %u ref %u %u %s obs",
s->idx, s->nr_data, s->nr_parity,
bitmap_weight(s->blocks_allocated, s->nr_data),
atomic_read(&s->ref[STRIPE_REF_io]),
atomic_read(&s->ref[STRIPE_REF_stripe]),
bch2_watermarks[s->h->watermark]);
struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
unsigned i;
for_each_set_bit(i, s->blocks_gotten, v->nr_blocks)
prt_printf(out, " %u", s->blocks[i]);
prt_newline(out);
}
void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
{ {
struct ec_stripe_head *h; struct ec_stripe_head *h;
...@@ -2247,23 +2267,15 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) ...@@ -2247,23 +2267,15 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
bch2_watermarks[h->watermark]); bch2_watermarks[h->watermark]);
if (h->s) if (h->s)
prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n", bch2_new_stripe_to_text(out, c, h->s);
h->s->idx, h->s->nr_data, h->s->nr_parity,
bitmap_weight(h->s->blocks_allocated,
h->s->nr_data));
} }
mutex_unlock(&c->ec_stripe_head_lock); mutex_unlock(&c->ec_stripe_head_lock);
prt_printf(out, "in flight:\n"); prt_printf(out, "in flight:\n");
mutex_lock(&c->ec_stripe_new_lock); mutex_lock(&c->ec_stripe_new_lock);
list_for_each_entry(s, &c->ec_stripe_new_list, list) { list_for_each_entry(s, &c->ec_stripe_new_list, list)
prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n", bch2_new_stripe_to_text(out, c, s);
s->idx, s->nr_data, s->nr_parity,
atomic_read(&s->ref[STRIPE_REF_io]),
atomic_read(&s->ref[STRIPE_REF_stripe]),
bch2_watermarks[s->h->watermark]);
}
mutex_unlock(&c->ec_stripe_new_lock); mutex_unlock(&c->ec_stripe_new_lock);
} }
......
...@@ -126,11 +126,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, ...@@ -126,11 +126,7 @@ int bch2_extent_fallocate(struct btree_trans *trans,
if (closure_nr_remaining(&cl) != 1) { if (closure_nr_remaining(&cl) != 1) {
bch2_trans_unlock_long(trans); bch2_trans_unlock_long(trans);
bch2_wait_on_allocator(c, &cl);
if (closure_sync_timeout(&cl, HZ * 10)) {
bch2_print_allocator_stuck(c);
closure_sync(&cl);
}
} }
return ret; return ret;
......
...@@ -406,6 +406,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio ...@@ -406,6 +406,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
bch2_trans_iter_init(trans, &iter, rbio->data_btree, bch2_trans_iter_init(trans, &iter, rbio->data_btree,
rbio->read_pos, BTREE_ITER_slots); rbio->read_pos, BTREE_ITER_slots);
retry: retry:
bch2_trans_begin(trans);
rbio->bio.bi_status = 0; rbio->bio.bi_status = 0;
k = bch2_btree_iter_peek_slot(&iter); k = bch2_btree_iter_peek_slot(&iter);
......
...@@ -1503,10 +1503,7 @@ static void __bch2_write(struct bch_write_op *op) ...@@ -1503,10 +1503,7 @@ static void __bch2_write(struct bch_write_op *op)
if ((op->flags & BCH_WRITE_SYNC) || if ((op->flags & BCH_WRITE_SYNC) ||
(!(op->flags & BCH_WRITE_SUBMITTED) && (!(op->flags & BCH_WRITE_SUBMITTED) &&
!(op->flags & BCH_WRITE_IN_WORKER))) { !(op->flags & BCH_WRITE_IN_WORKER))) {
if (closure_sync_timeout(&op->cl, HZ * 10)) { bch2_wait_on_allocator(c, &op->cl);
bch2_print_allocator_stuck(c);
closure_sync(&op->cl);
}
__bch2_write_index(op); __bch2_write_index(op);
......
...@@ -391,6 +391,11 @@ enum fsck_err_opts { ...@@ -391,6 +391,11 @@ enum fsck_err_opts {
OPT_BOOL(), \ OPT_BOOL(), \
BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \ BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \
NULL, "Log transaction function names in journal") \ NULL, "Log transaction function names in journal") \
x(allocator_stuck_timeout, u16, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_UINT(0, U16_MAX), \
BCH_SB_ALLOCATOR_STUCK_TIMEOUT, 30, \
NULL, "Default timeout in seconds for stuck allocator messages")\
x(noexcl, u8, \ x(noexcl, u8, \
OPT_FS|OPT_MOUNT, \ OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \ OPT_BOOL(), \
......
...@@ -414,6 +414,10 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, ...@@ -414,6 +414,10 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb,
if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb)) if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb))
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version)); SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version));
if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 &&
!BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb))
SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30);
} }
for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) {
......
...@@ -1193,7 +1193,6 @@ static void bch2_dev_free(struct bch_dev *ca) ...@@ -1193,7 +1193,6 @@ static void bch2_dev_free(struct bch_dev *ca)
if (ca->kobj.state_in_sysfs) if (ca->kobj.state_in_sysfs)
kobject_del(&ca->kobj); kobject_del(&ca->kobj);
kfree(ca->buckets_nouse);
bch2_free_super(&ca->disk_sb); bch2_free_super(&ca->disk_sb);
bch2_dev_allocator_background_exit(ca); bch2_dev_allocator_background_exit(ca);
bch2_dev_journal_exit(ca); bch2_dev_journal_exit(ca);
......
...@@ -367,7 +367,7 @@ SHOW(bch2_fs) ...@@ -367,7 +367,7 @@ SHOW(bch2_fs)
bch2_stripes_heap_to_text(out, c); bch2_stripes_heap_to_text(out, c);
if (attr == &sysfs_open_buckets) if (attr == &sysfs_open_buckets)
bch2_open_buckets_to_text(out, c); bch2_open_buckets_to_text(out, c, NULL);
if (attr == &sysfs_open_buckets_partial) if (attr == &sysfs_open_buckets_partial)
bch2_open_buckets_partial_to_text(out, c); bch2_open_buckets_partial_to_text(out, c);
...@@ -811,6 +811,9 @@ SHOW(bch2_dev) ...@@ -811,6 +811,9 @@ SHOW(bch2_dev)
if (attr == &sysfs_alloc_debug) if (attr == &sysfs_alloc_debug)
bch2_dev_alloc_debug_to_text(out, ca); bch2_dev_alloc_debug_to_text(out, ca);
if (attr == &sysfs_open_buckets)
bch2_open_buckets_to_text(out, c, ca);
return 0; return 0;
} }
...@@ -892,6 +895,7 @@ struct attribute *bch2_dev_files[] = { ...@@ -892,6 +895,7 @@ struct attribute *bch2_dev_files[] = {
/* debug: */ /* debug: */
&sysfs_alloc_debug, &sysfs_alloc_debug,
&sysfs_open_buckets,
NULL NULL
}; };
......
...@@ -5936,6 +5936,9 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) ...@@ -5936,6 +5936,9 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!depth)) if (DEBUG_LOCKS_WARN_ON(!depth))
return; return;
if (unlikely(lock->key == &__lockdep_no_track__))
return;
hlock = find_held_lock(curr, lock, depth, &i); hlock = find_held_lock(curr, lock, depth, &i);
if (!hlock) { if (!hlock) {
print_lock_contention_bug(curr, lock, ip); print_lock_contention_bug(curr, lock, ip);
...@@ -5978,6 +5981,9 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) ...@@ -5978,6 +5981,9 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!depth)) if (DEBUG_LOCKS_WARN_ON(!depth))
return; return;
if (unlikely(lock->key == &__lockdep_no_track__))
return;
hlock = find_held_lock(curr, lock, depth, &i); hlock = find_held_lock(curr, lock, depth, &i);
if (!hlock) { if (!hlock) {
print_lock_contention_bug(curr, lock, _RET_IP_); print_lock_contention_bug(curr, lock, _RET_IP_);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment