Commit 0b4989eb authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'bcachefs-2024-06-12' of https://evilpiepirate.org/git/bcachefs

Pull bcachefs fixes from Kent Overstreet:

 - fix kworker explosion, due to calling submit_bio() (which can block)
   from a multithreaded workqueue

 - fix error handling in btree node scan

 - forward compat fix: kill an old debug assert

 - key cache shrinker fixes

   This is a partial fix for stalls doing multithreaded creates - there
   were various O(n^2) issues the key cache shrinker was hitting [1].

   There's more work coming here; I'm working on a patch to delete the
   key cache lock, which initial testing shows to be a pretty drastic
   performance improvement

 - assorted syzbot fixes

Link: https://lore.kernel.org/linux-bcachefs/CAGudoHGenxzk0ZqPXXi1_QDbfqQhGHu+wUwzyS6WmfkUZ1HiXA@mail.gmail.com/ [1]

* tag 'bcachefs-2024-06-12' of https://evilpiepirate.org/git/bcachefs:
  bcachefs: Fix rcu_read_lock() leak in drop_extra_replicas
  bcachefs: Add missing bch_inode_info.ei_flags init
  bcachefs: Add missing synchronize_srcu_expedited() call when shutting down
  bcachefs: Check for invalid bucket from bucket_gen(), gc_bucket()
  bcachefs: Replace bucket_valid() asserts in bucket lookup with proper checks
  bcachefs: Fix snapshot_create_lock lock ordering
  bcachefs: Fix refcount leak in check_fix_ptrs()
  bcachefs: Leave a buffer in the btree key cache to avoid lock thrashing
  bcachefs: Fix reporting of freed objects from key cache shrinker
  bcachefs: set sb->s_shrinker->seeks = 0
  bcachefs: increase key cache shrinker batch size
  bcachefs: Enable automatic shrinking for rhashtables
  bcachefs: fix the display format for show-super
  bcachefs: fix stack frame size in fsck.c
  bcachefs: Delete incorrect BTREE_ID_NR assertion
  bcachefs: Fix incorrect error handling found_btree_node_is_readable()
  bcachefs: Split out btree_write_submit_wq
parents cea2a265 f2736b9c
...@@ -741,6 +741,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, ...@@ -741,6 +741,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
enum btree_iter_update_trigger_flags flags) enum btree_iter_update_trigger_flags flags)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
int ret = 0; int ret = 0;
struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p); struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p);
...@@ -860,8 +861,14 @@ int bch2_trigger_alloc(struct btree_trans *trans, ...@@ -860,8 +861,14 @@ int bch2_trigger_alloc(struct btree_trans *trans,
} }
percpu_down_read(&c->mark_lock); percpu_down_read(&c->mark_lock);
if (new_a->gen != old_a->gen) if (new_a->gen != old_a->gen) {
*bucket_gen(ca, new.k->p.offset) = new_a->gen; u8 *gen = bucket_gen(ca, new.k->p.offset);
if (unlikely(!gen)) {
percpu_up_read(&c->mark_lock);
goto invalid_bucket;
}
*gen = new_a->gen;
}
bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false); bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false);
percpu_up_read(&c->mark_lock); percpu_up_read(&c->mark_lock);
...@@ -895,6 +902,11 @@ int bch2_trigger_alloc(struct btree_trans *trans, ...@@ -895,6 +902,11 @@ int bch2_trigger_alloc(struct btree_trans *trans,
percpu_down_read(&c->mark_lock); percpu_down_read(&c->mark_lock);
struct bucket *g = gc_bucket(ca, new.k->p.offset); struct bucket *g = gc_bucket(ca, new.k->p.offset);
if (unlikely(!g)) {
percpu_up_read(&c->mark_lock);
goto invalid_bucket;
}
g->gen_valid = 1;
bucket_lock(g); bucket_lock(g);
...@@ -910,8 +922,14 @@ int bch2_trigger_alloc(struct btree_trans *trans, ...@@ -910,8 +922,14 @@ int bch2_trigger_alloc(struct btree_trans *trans,
percpu_up_read(&c->mark_lock); percpu_up_read(&c->mark_lock);
} }
err: err:
printbuf_exit(&buf);
bch2_dev_put(ca); bch2_dev_put(ca);
return ret; return ret;
invalid_bucket:
bch2_fs_inconsistent(c, "reference to invalid bucket\n %s",
(bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf));
ret = -EIO;
goto err;
} }
/* /*
......
...@@ -790,7 +790,8 @@ struct bch_fs { ...@@ -790,7 +790,8 @@ struct bch_fs {
/* BTREE CACHE */ /* BTREE CACHE */
struct bio_set btree_bio; struct bio_set btree_bio;
struct workqueue_struct *io_complete_wq; struct workqueue_struct *btree_read_complete_wq;
struct workqueue_struct *btree_write_submit_wq;
struct btree_root btree_roots_known[BTREE_ID_NR]; struct btree_root btree_roots_known[BTREE_ID_NR];
DARRAY(struct btree_root) btree_roots_extra; DARRAY(struct btree_root) btree_roots_extra;
......
...@@ -91,10 +91,11 @@ static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, ...@@ -91,10 +91,11 @@ static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
} }
static const struct rhashtable_params bch_btree_cache_params = { static const struct rhashtable_params bch_btree_cache_params = {
.head_offset = offsetof(struct btree, hash), .head_offset = offsetof(struct btree, hash),
.key_offset = offsetof(struct btree, hash_val), .key_offset = offsetof(struct btree, hash_val),
.key_len = sizeof(u64), .key_len = sizeof(u64),
.obj_cmpfn = bch2_btree_cache_cmp_fn, .obj_cmpfn = bch2_btree_cache_cmp_fn,
.automatic_shrinking = true,
}; };
static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
......
...@@ -874,6 +874,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans, ...@@ -874,6 +874,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
const struct bch_alloc_v4 *old; const struct bch_alloc_v4 *old;
int ret; int ret;
if (!bucket_valid(ca, k.k->p.offset))
return 0;
old = bch2_alloc_to_v4(k, &old_convert); old = bch2_alloc_to_v4(k, &old_convert);
gc = new = *old; gc = new = *old;
...@@ -990,6 +993,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c) ...@@ -990,6 +993,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c)
buckets->first_bucket = ca->mi.first_bucket; buckets->first_bucket = ca->mi.first_bucket;
buckets->nbuckets = ca->mi.nbuckets; buckets->nbuckets = ca->mi.nbuckets;
buckets->nbuckets_minus_first =
buckets->nbuckets - buckets->first_bucket;
rcu_assign_pointer(ca->buckets_gc, buckets); rcu_assign_pointer(ca->buckets_gc, buckets);
} }
...@@ -1003,12 +1008,14 @@ static int bch2_gc_alloc_start(struct bch_fs *c) ...@@ -1003,12 +1008,14 @@ static int bch2_gc_alloc_start(struct bch_fs *c)
continue; continue;
} }
struct bch_alloc_v4 a_convert; if (bucket_valid(ca, k.k->p.offset)) {
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert); struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
struct bucket *g = gc_bucket(ca, k.k->p.offset); struct bucket *g = gc_bucket(ca, k.k->p.offset);
g->gen_valid = 1; g->gen_valid = 1;
g->gen = a->gen; g->gen = a->gen;
}
0; 0;
}))); })));
bch2_dev_put(ca); bch2_dev_put(ca);
......
...@@ -1389,7 +1389,7 @@ static void btree_node_read_endio(struct bio *bio) ...@@ -1389,7 +1389,7 @@ static void btree_node_read_endio(struct bio *bio)
bch2_latency_acct(ca, rb->start_time, READ); bch2_latency_acct(ca, rb->start_time, READ);
} }
queue_work(c->io_complete_wq, &rb->work); queue_work(c->btree_read_complete_wq, &rb->work);
} }
struct btree_node_read_all { struct btree_node_read_all {
...@@ -1656,7 +1656,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool ...@@ -1656,7 +1656,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
btree_node_read_all_replicas_done(&ra->cl.work); btree_node_read_all_replicas_done(&ra->cl.work);
} else { } else {
continue_at(&ra->cl, btree_node_read_all_replicas_done, continue_at(&ra->cl, btree_node_read_all_replicas_done,
c->io_complete_wq); c->btree_read_complete_wq);
} }
return 0; return 0;
...@@ -1737,7 +1737,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, ...@@ -1737,7 +1737,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
if (sync) if (sync)
btree_node_read_work(&rb->work); btree_node_read_work(&rb->work);
else else
queue_work(c->io_complete_wq, &rb->work); queue_work(c->btree_read_complete_wq, &rb->work);
} }
} }
...@@ -2229,7 +2229,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) ...@@ -2229,7 +2229,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes); atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes);
INIT_WORK(&wbio->work, btree_write_submit); INIT_WORK(&wbio->work, btree_write_submit);
queue_work(c->io_complete_wq, &wbio->work); queue_work(c->btree_write_submit_wq, &wbio->work);
return; return;
err: err:
set_btree_node_noevict(b); set_btree_node_noevict(b);
......
...@@ -221,11 +221,8 @@ static void bch2_btree_path_verify(struct btree_trans *trans, ...@@ -221,11 +221,8 @@ static void bch2_btree_path_verify(struct btree_trans *trans,
struct btree_path *path) struct btree_path *path)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
unsigned i;
EBUG_ON(path->btree_id >= BTREE_ID_NR);
for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) { for (unsigned i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) {
if (!path->l[i].b) { if (!path->l[i].b) {
BUG_ON(!path->cached && BUG_ON(!path->cached &&
bch2_btree_id_root(c, path->btree_id)->b->c.level > i); bch2_btree_id_root(c, path->btree_id)->b->c.level > i);
...@@ -251,8 +248,6 @@ static void bch2_btree_iter_verify(struct btree_iter *iter) ...@@ -251,8 +248,6 @@ static void bch2_btree_iter_verify(struct btree_iter *iter)
{ {
struct btree_trans *trans = iter->trans; struct btree_trans *trans = iter->trans;
BUG_ON(iter->btree_id >= BTREE_ID_NR);
BUG_ON(!!(iter->flags & BTREE_ITER_cached) != btree_iter_path(trans, iter)->cached); BUG_ON(!!(iter->flags & BTREE_ITER_cached) != btree_iter_path(trans, iter)->cached);
BUG_ON((iter->flags & BTREE_ITER_is_extents) && BUG_ON((iter->flags & BTREE_ITER_is_extents) &&
...@@ -3406,8 +3401,10 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c) ...@@ -3406,8 +3401,10 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
bch2_time_stats_exit(&s->lock_hold_times); bch2_time_stats_exit(&s->lock_hold_times);
} }
if (c->btree_trans_barrier_initialized) if (c->btree_trans_barrier_initialized) {
synchronize_srcu_expedited(&c->btree_trans_barrier);
cleanup_srcu_struct(&c->btree_trans_barrier); cleanup_srcu_struct(&c->btree_trans_barrier);
}
mempool_exit(&c->btree_trans_mem_pool); mempool_exit(&c->btree_trans_mem_pool);
mempool_exit(&c->btree_trans_pool); mempool_exit(&c->btree_trans_pool);
} }
......
...@@ -32,10 +32,11 @@ static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg, ...@@ -32,10 +32,11 @@ static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
} }
static const struct rhashtable_params bch2_btree_key_cache_params = { static const struct rhashtable_params bch2_btree_key_cache_params = {
.head_offset = offsetof(struct bkey_cached, hash), .head_offset = offsetof(struct bkey_cached, hash),
.key_offset = offsetof(struct bkey_cached, key), .key_offset = offsetof(struct bkey_cached, key),
.key_len = sizeof(struct bkey_cached_key), .key_len = sizeof(struct bkey_cached_key),
.obj_cmpfn = bch2_btree_key_cache_cmp_fn, .obj_cmpfn = bch2_btree_key_cache_cmp_fn,
.automatic_shrinking = true,
}; };
__flatten __flatten
...@@ -840,7 +841,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, ...@@ -840,7 +841,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
six_lock_exit(&ck->c.lock); six_lock_exit(&ck->c.lock);
kmem_cache_free(bch2_key_cache, ck); kmem_cache_free(bch2_key_cache, ck);
atomic_long_dec(&bc->nr_freed); atomic_long_dec(&bc->nr_freed);
freed++;
bc->nr_freed_nonpcpu--; bc->nr_freed_nonpcpu--;
bc->freed++; bc->freed++;
} }
...@@ -854,7 +854,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, ...@@ -854,7 +854,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
six_lock_exit(&ck->c.lock); six_lock_exit(&ck->c.lock);
kmem_cache_free(bch2_key_cache, ck); kmem_cache_free(bch2_key_cache, ck);
atomic_long_dec(&bc->nr_freed); atomic_long_dec(&bc->nr_freed);
freed++;
bc->nr_freed_pcpu--; bc->nr_freed_pcpu--;
bc->freed++; bc->freed++;
} }
...@@ -876,23 +875,22 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, ...@@ -876,23 +875,22 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
bc->skipped_dirty++; bc->skipped_dirty++;
goto next;
} else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) { } else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) {
clear_bit(BKEY_CACHED_ACCESSED, &ck->flags); clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
bc->skipped_accessed++; bc->skipped_accessed++;
goto next; } else if (!bkey_cached_lock_for_evict(ck)) {
} else if (bkey_cached_lock_for_evict(ck)) { bc->skipped_lock_fail++;
} else {
bkey_cached_evict(bc, ck); bkey_cached_evict(bc, ck);
bkey_cached_free(bc, ck); bkey_cached_free(bc, ck);
bc->moved_to_freelist++; bc->moved_to_freelist++;
} else { freed++;
bc->skipped_lock_fail++;
} }
scanned++; scanned++;
if (scanned >= nr) if (scanned >= nr)
break; break;
next:
pos = next; pos = next;
} }
...@@ -917,6 +915,14 @@ static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink, ...@@ -917,6 +915,14 @@ static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
long nr = atomic_long_read(&bc->nr_keys) - long nr = atomic_long_read(&bc->nr_keys) -
atomic_long_read(&bc->nr_dirty); atomic_long_read(&bc->nr_dirty);
/*
* Avoid hammering our shrinker too much if it's nearly empty - the
* shrinker code doesn't take into account how big our cache is, if it's
* mostly empty but the system is under memory pressure it causes nasty
* lock contention:
*/
nr -= 128;
return max(0L, nr); return max(0L, nr);
} }
...@@ -1025,9 +1031,10 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) ...@@ -1025,9 +1031,10 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
if (!shrink) if (!shrink)
return -BCH_ERR_ENOMEM_fs_btree_cache_init; return -BCH_ERR_ENOMEM_fs_btree_cache_init;
bc->shrink = shrink; bc->shrink = shrink;
shrink->seeks = 0;
shrink->count_objects = bch2_btree_key_cache_count; shrink->count_objects = bch2_btree_key_cache_count;
shrink->scan_objects = bch2_btree_key_cache_scan; shrink->scan_objects = bch2_btree_key_cache_scan;
shrink->batch = 1 << 14;
shrink->seeks = 0;
shrink->private_data = c; shrink->private_data = c;
shrinker_register(shrink); shrinker_register(shrink);
return 0; return 0;
......
...@@ -72,10 +72,11 @@ static bool found_btree_node_is_readable(struct btree_trans *trans, ...@@ -72,10 +72,11 @@ static bool found_btree_node_is_readable(struct btree_trans *trans,
struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false); struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false);
bool ret = !IS_ERR_OR_NULL(b); bool ret = !IS_ERR_OR_NULL(b);
if (ret) { if (!ret)
f->sectors_written = b->written; return ret;
six_unlock_read(&b->c.lock);
} f->sectors_written = b->written;
six_unlock_read(&b->c.lock);
/* /*
* We might update this node's range; if that happens, we need the node * We might update this node's range; if that happens, we need the node
......
This diff is collapsed.
...@@ -93,7 +93,8 @@ static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b) ...@@ -93,7 +93,8 @@ static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b)
{ {
struct bucket_array *buckets = gc_bucket_array(ca); struct bucket_array *buckets = gc_bucket_array(ca);
BUG_ON(!bucket_valid(ca, b)); if (b - buckets->first_bucket >= buckets->nbuckets_minus_first)
return NULL;
return buckets->b + b; return buckets->b + b;
} }
...@@ -110,7 +111,8 @@ static inline u8 *bucket_gen(struct bch_dev *ca, size_t b) ...@@ -110,7 +111,8 @@ static inline u8 *bucket_gen(struct bch_dev *ca, size_t b)
{ {
struct bucket_gens *gens = bucket_gens(ca); struct bucket_gens *gens = bucket_gens(ca);
BUG_ON(!bucket_valid(ca, b)); if (b - gens->first_bucket >= gens->nbuckets_minus_first)
return NULL;
return gens->b + b; return gens->b + b;
} }
...@@ -170,19 +172,22 @@ static inline int gen_after(u8 a, u8 b) ...@@ -170,19 +172,22 @@ static inline int gen_after(u8 a, u8 b)
return r > 0 ? r : 0; return r > 0 ? r : 0;
} }
static inline u8 dev_ptr_stale_rcu(struct bch_dev *ca, const struct bch_extent_ptr *ptr) static inline int dev_ptr_stale_rcu(struct bch_dev *ca, const struct bch_extent_ptr *ptr)
{ {
return gen_after(*bucket_gen(ca, PTR_BUCKET_NR(ca, ptr)), ptr->gen); u8 *gen = bucket_gen(ca, PTR_BUCKET_NR(ca, ptr));
if (!gen)
return -1;
return gen_after(*gen, ptr->gen);
} }
/** /**
* dev_ptr_stale() - check if a pointer points into a bucket that has been * dev_ptr_stale() - check if a pointer points into a bucket that has been
* invalidated. * invalidated.
*/ */
static inline u8 dev_ptr_stale(struct bch_dev *ca, const struct bch_extent_ptr *ptr) static inline int dev_ptr_stale(struct bch_dev *ca, const struct bch_extent_ptr *ptr)
{ {
rcu_read_lock(); rcu_read_lock();
u8 ret = dev_ptr_stale_rcu(ca, ptr); int ret = dev_ptr_stale_rcu(ca, ptr);
rcu_read_unlock(); rcu_read_unlock();
return ret; return ret;
......
...@@ -22,6 +22,7 @@ struct bucket_array { ...@@ -22,6 +22,7 @@ struct bucket_array {
struct rcu_head rcu; struct rcu_head rcu;
u16 first_bucket; u16 first_bucket;
size_t nbuckets; size_t nbuckets;
size_t nbuckets_minus_first;
struct bucket b[]; struct bucket b[];
}; };
...@@ -29,6 +30,7 @@ struct bucket_gens { ...@@ -29,6 +30,7 @@ struct bucket_gens {
struct rcu_head rcu; struct rcu_head rcu;
u16 first_bucket; u16 first_bucket;
size_t nbuckets; size_t nbuckets;
size_t nbuckets_minus_first;
u8 b[]; u8 b[];
}; };
......
...@@ -202,9 +202,8 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, ...@@ -202,9 +202,8 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
bch2_bkey_durability(c, bkey_i_to_s_c(&new->k_i)); bch2_bkey_durability(c, bkey_i_to_s_c(&new->k_i));
/* Now, drop excess replicas: */ /* Now, drop excess replicas: */
restart_drop_extra_replicas:
rcu_read_lock(); rcu_read_lock();
restart_drop_extra_replicas:
bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs(bkey_i_to_s(insert)), p, entry) { bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs(bkey_i_to_s(insert)), p, entry) {
unsigned ptr_durability = bch2_extent_ptr_durability(c, &p); unsigned ptr_durability = bch2_extent_ptr_durability(c, &p);
......
...@@ -268,6 +268,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, ...@@ -268,6 +268,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
const struct bch_extent_ptr *ptr = s.v->ptrs + ptr_idx; const struct bch_extent_ptr *ptr = s.v->ptrs + ptr_idx;
struct printbuf buf = PRINTBUF;
int ret = 0; int ret = 0;
struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev); struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev);
...@@ -289,6 +290,13 @@ static int mark_stripe_bucket(struct btree_trans *trans, ...@@ -289,6 +290,13 @@ static int mark_stripe_bucket(struct btree_trans *trans,
if (flags & BTREE_TRIGGER_gc) { if (flags & BTREE_TRIGGER_gc) {
percpu_down_read(&c->mark_lock); percpu_down_read(&c->mark_lock);
struct bucket *g = gc_bucket(ca, bucket.offset); struct bucket *g = gc_bucket(ca, bucket.offset);
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
ptr->dev,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
ret = -EIO;
goto err_unlock;
}
bucket_lock(g); bucket_lock(g);
struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old;
ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags); ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags);
...@@ -297,10 +305,12 @@ static int mark_stripe_bucket(struct btree_trans *trans, ...@@ -297,10 +305,12 @@ static int mark_stripe_bucket(struct btree_trans *trans,
bch2_dev_usage_update(c, ca, &old, &new, 0, true); bch2_dev_usage_update(c, ca, &old, &new, 0, true);
} }
bucket_unlock(g); bucket_unlock(g);
err_unlock:
percpu_up_read(&c->mark_lock); percpu_up_read(&c->mark_lock);
} }
err: err:
bch2_dev_put(ca); bch2_dev_put(ca);
printbuf_exit(&buf);
return ret; return ret;
} }
...@@ -714,10 +724,12 @@ static void ec_block_endio(struct bio *bio) ...@@ -714,10 +724,12 @@ static void ec_block_endio(struct bio *bio)
bch2_blk_status_to_str(bio->bi_status))) bch2_blk_status_to_str(bio->bi_status)))
clear_bit(ec_bio->idx, ec_bio->buf->valid); clear_bit(ec_bio->idx, ec_bio->buf->valid);
if (dev_ptr_stale(ca, ptr)) { int stale = dev_ptr_stale(ca, ptr);
if (stale) {
bch_err_ratelimited(ca->fs, bch_err_ratelimited(ca->fs,
"error %s stripe: stale pointer after io", "error %s stripe: stale/invalid pointer (%i) after io",
bio_data_dir(bio) == READ ? "reading from" : "writing to"); bio_data_dir(bio) == READ ? "reading from" : "writing to",
stale);
clear_bit(ec_bio->idx, ec_bio->buf->valid); clear_bit(ec_bio->idx, ec_bio->buf->valid);
} }
...@@ -743,10 +755,12 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, ...@@ -743,10 +755,12 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
return; return;
} }
if (dev_ptr_stale(ca, ptr)) { int stale = dev_ptr_stale(ca, ptr);
if (stale) {
bch_err_ratelimited(c, bch_err_ratelimited(c,
"error %s stripe: stale pointer", "error %s stripe: stale pointer (%i)",
rw == READ ? "reading from" : "writing to"); rw == READ ? "reading from" : "writing to",
stale);
clear_bit(idx, buf->valid); clear_bit(idx, buf->valid);
return; return;
} }
......
...@@ -137,7 +137,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, ...@@ -137,7 +137,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev);
if (p.ptr.cached && (!ca || dev_ptr_stale(ca, &p.ptr))) if (p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr)))
continue; continue;
f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL; f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
...@@ -999,7 +999,7 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) ...@@ -999,7 +999,7 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
bch2_bkey_drop_ptrs(k, ptr, bch2_bkey_drop_ptrs(k, ptr,
ptr->cached && ptr->cached &&
(ca = bch2_dev_rcu(c, ptr->dev)) && (ca = bch2_dev_rcu(c, ptr->dev)) &&
dev_ptr_stale_rcu(ca, ptr)); dev_ptr_stale_rcu(ca, ptr) > 0);
rcu_read_unlock(); rcu_read_unlock();
return bkey_deleted(k.k); return bkey_deleted(k.k);
...@@ -1024,8 +1024,11 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc ...@@ -1024,8 +1024,11 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc
prt_str(out, " cached"); prt_str(out, " cached");
if (ptr->unwritten) if (ptr->unwritten)
prt_str(out, " unwritten"); prt_str(out, " unwritten");
if (bucket_valid(ca, b) && dev_ptr_stale_rcu(ca, ptr)) int stale = dev_ptr_stale_rcu(ca, ptr);
if (stale > 0)
prt_printf(out, " stale"); prt_printf(out, " stale");
else if (stale)
prt_printf(out, " invalid");
} }
rcu_read_unlock(); rcu_read_unlock();
--out->atomic; --out->atomic;
......
...@@ -308,8 +308,8 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) ...@@ -308,8 +308,8 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
return ret; return ret;
} }
static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
struct bch_ioctl_subvolume arg) struct bch_ioctl_subvolume arg)
{ {
struct inode *dir; struct inode *dir;
struct bch_inode_info *inode; struct bch_inode_info *inode;
...@@ -406,9 +406,12 @@ static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, ...@@ -406,9 +406,12 @@ static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
!arg.src_ptr) !arg.src_ptr)
snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol; snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol;
down_write(&c->snapshot_create_lock);
inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir),
dst_dentry, arg.mode|S_IFDIR, dst_dentry, arg.mode|S_IFDIR,
0, snapshot_src, create_flags); 0, snapshot_src, create_flags);
up_write(&c->snapshot_create_lock);
error = PTR_ERR_OR_ZERO(inode); error = PTR_ERR_OR_ZERO(inode);
if (error) if (error)
goto err3; goto err3;
...@@ -429,16 +432,6 @@ static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, ...@@ -429,16 +432,6 @@ static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
return error; return error;
} }
static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
struct bch_ioctl_subvolume arg)
{
down_write(&c->snapshot_create_lock);
long ret = __bch2_ioctl_subvolume_create(c, filp, arg);
up_write(&c->snapshot_create_lock);
return ret;
}
static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
struct bch_ioctl_subvolume arg) struct bch_ioctl_subvolume arg)
{ {
......
...@@ -227,7 +227,9 @@ static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c) ...@@ -227,7 +227,9 @@ static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c)
mutex_init(&inode->ei_update_lock); mutex_init(&inode->ei_update_lock);
two_state_lock_init(&inode->ei_pagecache_lock); two_state_lock_init(&inode->ei_pagecache_lock);
INIT_LIST_HEAD(&inode->ei_vfs_inode_list); INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
inode->ei_flags = 0;
mutex_init(&inode->ei_quota_lock); mutex_init(&inode->ei_quota_lock);
memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush));
inode->v.i_state = 0; inode->v.i_state = 0;
if (unlikely(inode_init_always(c->vfs_sb, &inode->v))) { if (unlikely(inode_init_always(c->vfs_sb, &inode->v))) {
...@@ -1967,6 +1969,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, ...@@ -1967,6 +1969,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec);
sb->s_uuid = c->sb.user_uuid; sb->s_uuid = c->sb.user_uuid;
sb->s_shrink->seeks = 0;
c->vfs_sb = sb; c->vfs_sb = sb;
strscpy(sb->s_id, c->name, sizeof(sb->s_id)); strscpy(sb->s_id, c->name, sizeof(sb->s_id));
......
...@@ -1677,6 +1677,7 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) ...@@ -1677,6 +1677,7 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
trans_was_restarted(trans, restart_count); trans_was_restarted(trans, restart_count);
} }
noinline_for_stack
static int check_dirent_inode_dirent(struct btree_trans *trans, static int check_dirent_inode_dirent(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bkey_s_c_dirent d, struct bkey_s_c_dirent d,
...@@ -1773,6 +1774,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, ...@@ -1773,6 +1774,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans,
return ret; return ret;
} }
noinline_for_stack
static int check_dirent_target(struct btree_trans *trans, static int check_dirent_target(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bkey_s_c_dirent d, struct bkey_s_c_dirent d,
...@@ -1847,6 +1849,7 @@ static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *su ...@@ -1847,6 +1849,7 @@ static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *su
return ret; return ret;
} }
noinline_for_stack
static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter, static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c_dirent d) struct bkey_s_c_dirent d)
{ {
......
...@@ -84,9 +84,10 @@ struct promote_op { ...@@ -84,9 +84,10 @@ struct promote_op {
}; };
static const struct rhashtable_params bch_promote_params = { static const struct rhashtable_params bch_promote_params = {
.head_offset = offsetof(struct promote_op, hash), .head_offset = offsetof(struct promote_op, hash),
.key_offset = offsetof(struct promote_op, pos), .key_offset = offsetof(struct promote_op, pos),
.key_len = sizeof(struct bpos), .key_len = sizeof(struct bpos),
.automatic_shrinking = true,
}; };
static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
...@@ -776,18 +777,32 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, ...@@ -776,18 +777,32 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
PTR_BUCKET_POS(ca, &ptr), PTR_BUCKET_POS(ca, &ptr),
BTREE_ITER_cached); BTREE_ITER_cached);
prt_printf(&buf, "Attempting to read from stale dirty pointer:\n"); u8 *gen = bucket_gen(ca, iter.pos.offset);
printbuf_indent_add(&buf, 2); if (gen) {
bch2_bkey_val_to_text(&buf, c, k); prt_printf(&buf, "Attempting to read from stale dirty pointer:\n");
prt_newline(&buf); printbuf_indent_add(&buf, 2);
prt_printf(&buf, "memory gen: %u", *bucket_gen(ca, iter.pos.offset)); bch2_bkey_val_to_text(&buf, c, k);
ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
if (!ret) {
prt_newline(&buf); prt_newline(&buf);
prt_printf(&buf, "memory gen: %u", *gen);
ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
if (!ret) {
prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, k);
}
} else {
prt_printf(&buf, "Attempting to read from invalid bucket %llu:%llu:\n",
iter.pos.inode, iter.pos.offset);
printbuf_indent_add(&buf, 2);
prt_printf(&buf, "first bucket %u nbuckets %llu\n",
ca->mi.first_bucket, ca->mi.nbuckets);
bch2_bkey_val_to_text(&buf, c, k); bch2_bkey_val_to_text(&buf, c, k);
prt_newline(&buf);
} }
bch2_fs_inconsistent(c, "%s", buf.buf); bch2_fs_inconsistent(c, "%s", buf.buf);
......
...@@ -1220,7 +1220,7 @@ static void bch2_nocow_write(struct bch_write_op *op) ...@@ -1220,7 +1220,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets; DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets;
u32 snapshot; u32 snapshot;
struct bucket_to_lock *stale_at; struct bucket_to_lock *stale_at;
int ret; int stale, ret;
if (op->flags & BCH_WRITE_MOVE) if (op->flags & BCH_WRITE_MOVE)
return; return;
...@@ -1299,7 +1299,8 @@ static void bch2_nocow_write(struct bch_write_op *op) ...@@ -1299,7 +1299,8 @@ static void bch2_nocow_write(struct bch_write_op *op)
BUCKET_NOCOW_LOCK_UPDATE); BUCKET_NOCOW_LOCK_UPDATE);
rcu_read_lock(); rcu_read_lock();
bool stale = gen_after(*bucket_gen(ca, i->b.offset), i->gen); u8 *gen = bucket_gen(ca, i->b.offset);
stale = !gen ? -1 : gen_after(*gen, i->gen);
rcu_read_unlock(); rcu_read_unlock();
if (unlikely(stale)) { if (unlikely(stale)) {
...@@ -1380,8 +1381,18 @@ static void bch2_nocow_write(struct bch_write_op *op) ...@@ -1380,8 +1381,18 @@ static void bch2_nocow_write(struct bch_write_op *op)
break; break;
} }
/* We can retry this: */ struct printbuf buf = PRINTBUF;
ret = -BCH_ERR_transaction_restart; if (bch2_fs_inconsistent_on(stale < 0, c,
"pointer to invalid bucket in nocow path on device %llu\n %s",
stale_at->b.inode,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = -EIO;
} else {
/* We can retry this: */
ret = -BCH_ERR_transaction_restart;
}
printbuf_exit(&buf);
goto err_get_ioref; goto err_get_ioref;
} }
......
...@@ -35,9 +35,10 @@ struct buckets_in_flight { ...@@ -35,9 +35,10 @@ struct buckets_in_flight {
}; };
static const struct rhashtable_params bch_move_bucket_params = { static const struct rhashtable_params bch_move_bucket_params = {
.head_offset = offsetof(struct move_bucket_in_flight, hash), .head_offset = offsetof(struct move_bucket_in_flight, hash),
.key_offset = offsetof(struct move_bucket_in_flight, bucket.k), .key_offset = offsetof(struct move_bucket_in_flight, bucket.k),
.key_len = sizeof(struct move_bucket_key), .key_len = sizeof(struct move_bucket_key),
.automatic_shrinking = true,
}; };
static struct move_bucket_in_flight * static struct move_bucket_in_flight *
......
...@@ -1310,15 +1310,15 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, ...@@ -1310,15 +1310,15 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
prt_printf(out, "Device index:\t%u\n", sb->dev_idx); prt_printf(out, "Device index:\t%u\n", sb->dev_idx);
prt_str(out, "Label:\t"); prt_printf(out, "Label:\t");
prt_printf(out, "%.*s", (int) sizeof(sb->label), sb->label); prt_printf(out, "%.*s", (int) sizeof(sb->label), sb->label);
prt_newline(out); prt_newline(out);
prt_str(out, "Version:\t"); prt_printf(out, "Version:\t");
bch2_version_to_text(out, le16_to_cpu(sb->version)); bch2_version_to_text(out, le16_to_cpu(sb->version));
prt_newline(out); prt_newline(out);
prt_str(out, "Version upgrade complete:\t"); prt_printf(out, "Version upgrade complete:\t");
bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb)); bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb));
prt_newline(out); prt_newline(out);
......
...@@ -582,8 +582,10 @@ static void __bch2_fs_free(struct bch_fs *c) ...@@ -582,8 +582,10 @@ static void __bch2_fs_free(struct bch_fs *c)
if (c->write_ref_wq) if (c->write_ref_wq)
destroy_workqueue(c->write_ref_wq); destroy_workqueue(c->write_ref_wq);
if (c->io_complete_wq) if (c->btree_write_submit_wq)
destroy_workqueue(c->io_complete_wq); destroy_workqueue(c->btree_write_submit_wq);
if (c->btree_read_complete_wq)
destroy_workqueue(c->btree_read_complete_wq);
if (c->copygc_wq) if (c->copygc_wq)
destroy_workqueue(c->copygc_wq); destroy_workqueue(c->copygc_wq);
if (c->btree_io_complete_wq) if (c->btree_io_complete_wq)
...@@ -878,8 +880,10 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ...@@ -878,8 +880,10 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) || WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc", !(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->io_complete_wq = alloc_workqueue("bcachefs_io", !(c->btree_read_complete_wq = alloc_workqueue("bcachefs_btree_read_complete",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 512)) || WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 512)) ||
!(c->btree_write_submit_wq = alloc_workqueue("bcachefs_btree_write_sumit",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
!(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref", !(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
WQ_FREEZABLE, 0)) || WQ_FREEZABLE, 0)) ||
#ifndef BCH_WRITE_REF_DEBUG #ifndef BCH_WRITE_REF_DEBUG
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment