Commit d94189ad authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Debug mode for c->writes references

This adds a debug mode where we split up the c->writes refcount into
distinct refcounts for every codepath that takes a reference, and adds
sysfs code to print the value of each ref.

This will make it easier to debug shutdown hangs due to refcount leaks.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent dd81a060
...@@ -1113,7 +1113,7 @@ static void bch2_do_discards_work(struct work_struct *work) ...@@ -1113,7 +1113,7 @@ static void bch2_do_discards_work(struct work_struct *work)
if (need_journal_commit * 2 > seen) if (need_journal_commit * 2 > seen)
bch2_journal_flush_async(&c->journal, NULL); bch2_journal_flush_async(&c->journal, NULL);
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_discard);
trace_discard_buckets(c, seen, open, need_journal_commit, discarded, trace_discard_buckets(c, seen, open, need_journal_commit, discarded,
bch2_err_str(ret)); bch2_err_str(ret));
...@@ -1121,9 +1121,9 @@ static void bch2_do_discards_work(struct work_struct *work) ...@@ -1121,9 +1121,9 @@ static void bch2_do_discards_work(struct work_struct *work)
void bch2_do_discards(struct bch_fs *c) void bch2_do_discards(struct bch_fs *c)
{ {
if (percpu_ref_tryget_live(&c->writes) && if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) &&
!queue_work(system_long_wq, &c->discard_work)) !queue_work(system_long_wq, &c->discard_work))
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_discard);
} }
static int invalidate_one_bucket(struct btree_trans *trans, static int invalidate_one_bucket(struct btree_trans *trans,
...@@ -1233,14 +1233,14 @@ static void bch2_do_invalidates_work(struct work_struct *work) ...@@ -1233,14 +1233,14 @@ static void bch2_do_invalidates_work(struct work_struct *work)
} }
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
} }
void bch2_do_invalidates(struct bch_fs *c) void bch2_do_invalidates(struct bch_fs *c)
{ {
if (percpu_ref_tryget_live(&c->writes) && if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) &&
!queue_work(system_long_wq, &c->invalidate_work)) !queue_work(system_long_wq, &c->invalidate_work))
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
} }
static int bucket_freespace_init(struct btree_trans *trans, struct btree_iter *iter, static int bucket_freespace_init(struct btree_trans *trans, struct btree_iter *iter,
......
...@@ -209,6 +209,10 @@ ...@@ -209,6 +209,10 @@
#include "opts.h" #include "opts.h"
#include "util.h" #include "util.h"
#ifdef CONFIG_BCACHEFS_DEBUG
#define BCH_WRITE_REF_DEBUG
#endif
#define dynamic_fault(...) 0 #define dynamic_fault(...) 0
#define race_fault(...) 0 #define race_fault(...) 0
...@@ -538,6 +542,7 @@ enum { ...@@ -538,6 +542,7 @@ enum {
/* shutdown: */ /* shutdown: */
BCH_FS_STOPPING, BCH_FS_STOPPING,
BCH_FS_EMERGENCY_RO, BCH_FS_EMERGENCY_RO,
BCH_FS_GOING_RO,
BCH_FS_WRITE_DISABLE_COMPLETE, BCH_FS_WRITE_DISABLE_COMPLETE,
BCH_FS_CLEAN_SHUTDOWN, BCH_FS_CLEAN_SHUTDOWN,
...@@ -627,6 +632,29 @@ typedef struct { ...@@ -627,6 +632,29 @@ typedef struct {
#define BCACHEFS_ROOT_SUBVOL_INUM \ #define BCACHEFS_ROOT_SUBVOL_INUM \
((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO }) ((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO })
#define BCH_WRITE_REFS() \
x(trans) \
x(write) \
x(promote) \
x(node_rewrite) \
x(stripe_create) \
x(stripe_delete) \
x(reflink) \
x(fallocate) \
x(discard) \
x(invalidate) \
x(move) \
x(delete_dead_snapshots) \
x(snapshot_delete_pagecache) \
x(sysfs)
enum bch_write_ref {
#define x(n) BCH_WRITE_REF_##n,
BCH_WRITE_REFS()
#undef x
BCH_WRITE_REF_NR,
};
struct bch_fs { struct bch_fs {
struct closure cl; struct closure cl;
...@@ -648,7 +676,11 @@ struct bch_fs { ...@@ -648,7 +676,11 @@ struct bch_fs {
struct rw_semaphore state_lock; struct rw_semaphore state_lock;
/* Counts outstanding writes, for clean transition to read-only */ /* Counts outstanding writes, for clean transition to read-only */
#ifdef BCH_WRITE_REF_DEBUG
atomic_long_t writes[BCH_WRITE_REF_NR];
#else
struct percpu_ref writes; struct percpu_ref writes;
#endif
struct work_struct read_only_work; struct work_struct read_only_work;
struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX]; struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX];
...@@ -965,6 +997,46 @@ mempool_t bio_bounce_pages; ...@@ -965,6 +997,46 @@ mempool_t bio_bounce_pages;
struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR]; struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
}; };
extern struct wait_queue_head bch2_read_only_wait;
static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref)
{
#ifdef BCH_WRITE_REF_DEBUG
atomic_long_inc(&c->writes[ref]);
#else
percpu_ref_get(&c->writes);
#endif
}
static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
{
#ifdef BCH_WRITE_REF_DEBUG
return !test_bit(BCH_FS_GOING_RO, &c->flags) &&
atomic_long_inc_not_zero(&c->writes[ref]);
#else
return percpu_ref_tryget_live(&c->writes);
#endif
}
static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref ref)
{
#ifdef BCH_WRITE_REF_DEBUG
long v = atomic_long_dec_return(&c->writes[ref]);
BUG_ON(v < 0);
if (v)
return;
for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++)
if (atomic_long_read(&c->writes[i]))
return;
set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
wake_up(&bch2_read_only_wait);
#else
percpu_ref_put(&c->writes);
#endif
}
static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages) static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
{ {
#ifndef NO_BCACHEFS_FS #ifndef NO_BCACHEFS_FS
......
...@@ -2036,7 +2036,7 @@ void async_btree_node_rewrite_work(struct work_struct *work) ...@@ -2036,7 +2036,7 @@ void async_btree_node_rewrite_work(struct work_struct *work)
bch2_trans_do(c, NULL, NULL, 0, bch2_trans_do(c, NULL, NULL, 0,
async_btree_node_rewrite_trans(&trans, a)); async_btree_node_rewrite_trans(&trans, a));
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
kfree(a); kfree(a);
} }
...@@ -2044,12 +2044,12 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) ...@@ -2044,12 +2044,12 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
{ {
struct async_btree_rewrite *a; struct async_btree_rewrite *a;
if (!percpu_ref_tryget_live(&c->writes)) if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite))
return; return;
a = kmalloc(sizeof(*a), GFP_NOFS); a = kmalloc(sizeof(*a), GFP_NOFS);
if (!a) { if (!a) {
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
return; return;
} }
......
...@@ -994,7 +994,7 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans) ...@@ -994,7 +994,7 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
if (ret) if (ret)
return ret; return ret;
percpu_ref_get(&c->writes); bch2_write_ref_get(c, BCH_WRITE_REF_trans);
return 0; return 0;
} }
...@@ -1043,7 +1043,7 @@ int __bch2_trans_commit(struct btree_trans *trans) ...@@ -1043,7 +1043,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
} }
if (!(trans->flags & BTREE_INSERT_NOCHECK_RW) && if (!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
unlikely(!percpu_ref_tryget_live(&c->writes))) { unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) {
ret = bch2_trans_commit_get_rw_cold(trans); ret = bch2_trans_commit_get_rw_cold(trans);
if (ret) if (ret)
goto out_reset; goto out_reset;
...@@ -1114,7 +1114,7 @@ int __bch2_trans_commit(struct btree_trans *trans) ...@@ -1114,7 +1114,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
bch2_journal_preres_put(&c->journal, &trans->journal_preres); bch2_journal_preres_put(&c->journal, &trans->journal_preres);
if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW))) if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_trans);
out_reset: out_reset:
bch2_trans_reset_updates(trans); bch2_trans_reset_updates(trans);
......
...@@ -707,14 +707,14 @@ static void ec_stripe_delete_work(struct work_struct *work) ...@@ -707,14 +707,14 @@ static void ec_stripe_delete_work(struct work_struct *work)
break; break;
} }
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
} }
void bch2_do_stripe_deletes(struct bch_fs *c) void bch2_do_stripe_deletes(struct bch_fs *c)
{ {
if (percpu_ref_tryget_live(&c->writes) && if (bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_delete) &&
!schedule_work(&c->ec_stripe_delete_work)) !schedule_work(&c->ec_stripe_delete_work))
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
} }
/* stripe creation: */ /* stripe creation: */
...@@ -922,7 +922,7 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -922,7 +922,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
BUG_ON(!s->allocated); BUG_ON(!s->allocated);
if (!percpu_ref_tryget_live(&c->writes)) if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_create))
goto err; goto err;
ec_generate_ec(&s->new_stripe); ec_generate_ec(&s->new_stripe);
...@@ -964,7 +964,7 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -964,7 +964,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
bch2_stripes_heap_insert(c, m, s->new_stripe.key.k.p.offset); bch2_stripes_heap_insert(c, m, s->new_stripe.key.k.p.offset);
spin_unlock(&c->ec_stripes_heap_lock); spin_unlock(&c->ec_stripes_heap_lock);
err_put_writes: err_put_writes:
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
err: err:
bch2_disk_reservation_put(c, &s->res); bch2_disk_reservation_put(c, &s->res);
......
...@@ -3231,7 +3231,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode, ...@@ -3231,7 +3231,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
long ret; long ret;
if (!percpu_ref_tryget_live(&c->writes)) if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate))
return -EROFS; return -EROFS;
inode_lock(&inode->v); inode_lock(&inode->v);
...@@ -3255,7 +3255,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode, ...@@ -3255,7 +3255,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
err: err:
bch2_pagecache_block_put(inode); bch2_pagecache_block_put(inode);
inode_unlock(&inode->v); inode_unlock(&inode->v);
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_fallocate);
return bch2_err_class(ret); return bch2_err_class(ret);
} }
......
...@@ -602,7 +602,7 @@ static void bch2_write_done(struct closure *cl) ...@@ -602,7 +602,7 @@ static void bch2_write_done(struct closure *cl)
struct bch_fs *c = op->c; struct bch_fs *c = op->c;
bch2_disk_reservation_put(c, &op->res); bch2_disk_reservation_put(c, &op->res);
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_write);
bch2_keylist_free(&op->insert_keys, op->inline_keys); bch2_keylist_free(&op->insert_keys, op->inline_keys);
bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time); bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
...@@ -1417,7 +1417,7 @@ void bch2_write(struct closure *cl) ...@@ -1417,7 +1417,7 @@ void bch2_write(struct closure *cl)
} }
if (c->opts.nochanges || if (c->opts.nochanges ||
!percpu_ref_tryget_live(&c->writes)) { !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
op->error = -BCH_ERR_erofs_no_writes; op->error = -BCH_ERR_erofs_no_writes;
goto err; goto err;
} }
...@@ -1496,7 +1496,7 @@ static void promote_free(struct bch_fs *c, struct promote_op *op) ...@@ -1496,7 +1496,7 @@ static void promote_free(struct bch_fs *c, struct promote_op *op)
ret = rhashtable_remove_fast(&c->promote_table, &op->hash, ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
bch_promote_params); bch_promote_params);
BUG_ON(ret); BUG_ON(ret);
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_promote);
kfree_rcu(op, rcu); kfree_rcu(op, rcu);
} }
...@@ -1544,7 +1544,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c, ...@@ -1544,7 +1544,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
int ret; int ret;
if (!percpu_ref_tryget_live(&c->writes)) if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
return NULL; return NULL;
op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO); op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO);
...@@ -1601,7 +1601,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c, ...@@ -1601,7 +1601,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
kfree(*rbio); kfree(*rbio);
*rbio = NULL; *rbio = NULL;
kfree(op); kfree(op);
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_promote);
return NULL; return NULL;
} }
......
...@@ -57,7 +57,7 @@ static void move_free(struct moving_io *io) ...@@ -57,7 +57,7 @@ static void move_free(struct moving_io *io)
bch2_data_update_exit(&io->write); bch2_data_update_exit(&io->write);
wake_up(&ctxt->wait); wake_up(&ctxt->wait);
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_move);
kfree(io); kfree(io);
} }
...@@ -250,7 +250,7 @@ static int bch2_move_extent(struct btree_trans *trans, ...@@ -250,7 +250,7 @@ static int bch2_move_extent(struct btree_trans *trans,
return 0; return 0;
} }
if (!percpu_ref_tryget_live(&c->writes)) if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_move))
return -BCH_ERR_erofs_no_writes; return -BCH_ERR_erofs_no_writes;
/* write path might have to decompress data: */ /* write path might have to decompress data: */
...@@ -319,7 +319,7 @@ static int bch2_move_extent(struct btree_trans *trans, ...@@ -319,7 +319,7 @@ static int bch2_move_extent(struct btree_trans *trans,
err_free: err_free:
kfree(io); kfree(io);
err: err:
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_move);
trace_and_count(c, move_extent_alloc_mem_fail, k.k); trace_and_count(c, move_extent_alloc_mem_fail, k.k);
return ret; return ret;
} }
......
...@@ -278,7 +278,7 @@ s64 bch2_remap_range(struct bch_fs *c, ...@@ -278,7 +278,7 @@ s64 bch2_remap_range(struct bch_fs *c,
u32 dst_snapshot, src_snapshot; u32 dst_snapshot, src_snapshot;
int ret = 0, ret2 = 0; int ret = 0, ret2 = 0;
if (!percpu_ref_tryget_live(&c->writes)) if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_reflink))
return -BCH_ERR_erofs_no_writes; return -BCH_ERR_erofs_no_writes;
bch2_check_set_feature(c, BCH_FEATURE_reflink); bch2_check_set_feature(c, BCH_FEATURE_reflink);
...@@ -412,7 +412,7 @@ s64 bch2_remap_range(struct bch_fs *c, ...@@ -412,7 +412,7 @@ s64 bch2_remap_range(struct bch_fs *c,
bch2_bkey_buf_exit(&new_src, c); bch2_bkey_buf_exit(&new_src, c);
bch2_bkey_buf_exit(&new_dst, c); bch2_bkey_buf_exit(&new_dst, c);
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_reflink);
return dst_done ?: ret ?: ret2; return dst_done ?: ret ?: ret2;
} }
...@@ -706,16 +706,14 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work) ...@@ -706,16 +706,14 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work)
struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
bch2_delete_dead_snapshots(c); bch2_delete_dead_snapshots(c);
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
} }
void bch2_delete_dead_snapshots_async(struct bch_fs *c) void bch2_delete_dead_snapshots_async(struct bch_fs *c)
{ {
if (!percpu_ref_tryget_live(&c->writes)) if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) &&
return; !queue_work(system_long_wq, &c->snapshot_delete_work))
bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
if (!queue_work(system_long_wq, &c->snapshot_delete_work))
percpu_ref_put(&c->writes);
} }
static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans, static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans,
...@@ -900,7 +898,7 @@ void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) ...@@ -900,7 +898,7 @@ void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
darray_exit(&s); darray_exit(&s);
} }
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
} }
struct subvolume_unlink_hook { struct subvolume_unlink_hook {
...@@ -923,11 +921,11 @@ int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans, ...@@ -923,11 +921,11 @@ int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
if (unlikely(!percpu_ref_tryget_live(&c->writes))) if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
return -EROFS; return -EROFS;
if (!queue_work(system_long_wq, &c->snapshot_wait_for_pagecache_and_delete_work)) if (!queue_work(system_long_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
return 0; return 0;
} }
......
...@@ -107,7 +107,7 @@ static struct kset *bcachefs_kset; ...@@ -107,7 +107,7 @@ static struct kset *bcachefs_kset;
static LIST_HEAD(bch_fs_list); static LIST_HEAD(bch_fs_list);
static DEFINE_MUTEX(bch_fs_list_lock); static DEFINE_MUTEX(bch_fs_list_lock);
static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait); DECLARE_WAIT_QUEUE_HEAD(bch2_read_only_wait);
static void bch2_dev_free(struct bch_dev *); static void bch2_dev_free(struct bch_dev *);
static int bch2_dev_alloc(struct bch_fs *, unsigned); static int bch2_dev_alloc(struct bch_fs *, unsigned);
...@@ -235,13 +235,15 @@ static void __bch2_fs_read_only(struct bch_fs *c) ...@@ -235,13 +235,15 @@ static void __bch2_fs_read_only(struct bch_fs *c)
bch2_dev_allocator_remove(c, ca); bch2_dev_allocator_remove(c, ca);
} }
#ifndef BCH_WRITE_REF_DEBUG
static void bch2_writes_disabled(struct percpu_ref *writes) static void bch2_writes_disabled(struct percpu_ref *writes)
{ {
struct bch_fs *c = container_of(writes, struct bch_fs, writes); struct bch_fs *c = container_of(writes, struct bch_fs, writes);
set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
wake_up(&bch_read_only_wait); wake_up(&bch2_read_only_wait);
} }
#endif
void bch2_fs_read_only(struct bch_fs *c) void bch2_fs_read_only(struct bch_fs *c)
{ {
...@@ -256,7 +258,13 @@ void bch2_fs_read_only(struct bch_fs *c) ...@@ -256,7 +258,13 @@ void bch2_fs_read_only(struct bch_fs *c)
* Block new foreground-end write operations from starting - any new * Block new foreground-end write operations from starting - any new
* writes will return -EROFS: * writes will return -EROFS:
*/ */
set_bit(BCH_FS_GOING_RO, &c->flags);
#ifndef BCH_WRITE_REF_DEBUG
percpu_ref_kill(&c->writes); percpu_ref_kill(&c->writes);
#else
for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++)
bch2_write_ref_put(c, i);
#endif
/* /*
* If we're not doing an emergency shutdown, we want to wait on * If we're not doing an emergency shutdown, we want to wait on
...@@ -269,16 +277,17 @@ void bch2_fs_read_only(struct bch_fs *c) ...@@ -269,16 +277,17 @@ void bch2_fs_read_only(struct bch_fs *c)
* we do need to wait on them before returning and signalling * we do need to wait on them before returning and signalling
* that going RO is complete: * that going RO is complete:
*/ */
wait_event(bch_read_only_wait, wait_event(bch2_read_only_wait,
test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) || test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) ||
test_bit(BCH_FS_EMERGENCY_RO, &c->flags)); test_bit(BCH_FS_EMERGENCY_RO, &c->flags));
__bch2_fs_read_only(c); __bch2_fs_read_only(c);
wait_event(bch_read_only_wait, wait_event(bch2_read_only_wait,
test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags)); test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
clear_bit(BCH_FS_GOING_RO, &c->flags);
if (!bch2_journal_error(&c->journal) && if (!bch2_journal_error(&c->journal) &&
!test_bit(BCH_FS_ERROR, &c->flags) && !test_bit(BCH_FS_ERROR, &c->flags) &&
...@@ -315,7 +324,7 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c) ...@@ -315,7 +324,7 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c)
bch2_journal_halt(&c->journal); bch2_journal_halt(&c->journal);
bch2_fs_read_only_async(c); bch2_fs_read_only_async(c);
wake_up(&bch_read_only_wait); wake_up(&bch2_read_only_wait);
return ret; return ret;
} }
...@@ -395,7 +404,14 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) ...@@ -395,7 +404,14 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
goto err; goto err;
} }
#ifndef BCH_WRITE_REF_DEBUG
percpu_ref_reinit(&c->writes); percpu_ref_reinit(&c->writes);
#else
for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++) {
BUG_ON(atomic_long_read(&c->writes[i]));
atomic_long_inc(&c->writes[i]);
}
#endif
set_bit(BCH_FS_RW, &c->flags); set_bit(BCH_FS_RW, &c->flags);
set_bit(BCH_FS_WAS_RW, &c->flags); set_bit(BCH_FS_WAS_RW, &c->flags);
...@@ -462,7 +478,9 @@ static void __bch2_fs_free(struct bch_fs *c) ...@@ -462,7 +478,9 @@ static void __bch2_fs_free(struct bch_fs *c)
mempool_exit(&c->btree_bounce_pool); mempool_exit(&c->btree_bounce_pool);
bioset_exit(&c->btree_bio); bioset_exit(&c->btree_bio);
mempool_exit(&c->fill_iter); mempool_exit(&c->fill_iter);
#ifndef BCH_WRITE_REF_DEBUG
percpu_ref_exit(&c->writes); percpu_ref_exit(&c->writes);
#endif
kfree(rcu_dereference_protected(c->disk_groups, 1)); kfree(rcu_dereference_protected(c->disk_groups, 1));
kfree(c->journal_seq_blacklist_table); kfree(c->journal_seq_blacklist_table);
kfree(c->unused_inode_hints); kfree(c->unused_inode_hints);
...@@ -769,8 +787,10 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ...@@ -769,8 +787,10 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->io_complete_wq = alloc_workqueue("bcachefs_io", !(c->io_complete_wq = alloc_workqueue("bcachefs_io",
WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 1)) || WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 1)) ||
#ifndef BCH_WRITE_REF_DEBUG
percpu_ref_init(&c->writes, bch2_writes_disabled, percpu_ref_init(&c->writes, bch2_writes_disabled,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) || PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
#endif
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) || mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
bioset_init(&c->btree_bio, 1, bioset_init(&c->btree_bio, 1,
max(offsetof(struct btree_read_bio, bio), max(offsetof(struct btree_read_bio, bio),
......
...@@ -250,7 +250,8 @@ int bch2_fs_read_write_early(struct bch_fs *); ...@@ -250,7 +250,8 @@ int bch2_fs_read_write_early(struct bch_fs *);
*/ */
static inline void bch2_fs_lazy_rw(struct bch_fs *c) static inline void bch2_fs_lazy_rw(struct bch_fs *c)
{ {
if (percpu_ref_is_zero(&c->writes)) if (!test_bit(BCH_FS_RW, &c->flags) &&
!test_bit(BCH_FS_WAS_RW, &c->flags))
bch2_fs_read_write_early(c); bch2_fs_read_write_early(c);
} }
......
...@@ -195,6 +195,29 @@ read_attribute(stripes_heap); ...@@ -195,6 +195,29 @@ read_attribute(stripes_heap);
read_attribute(open_buckets); read_attribute(open_buckets);
read_attribute(write_points); read_attribute(write_points);
#ifdef BCH_WRITE_REF_DEBUG
read_attribute(write_refs);
const char * const bch2_write_refs[] = {
#define x(n) #n,
BCH_WRITE_REFS()
#undef x
NULL
};
static void bch2_write_refs_to_text(struct printbuf *out, struct bch_fs *c)
{
bch2_printbuf_tabstop_push(out, 24);
for (unsigned i = 0; i < ARRAY_SIZE(c->writes); i++) {
prt_str(out, bch2_write_refs[i]);
prt_tab(out);
prt_printf(out, "%li", atomic_long_read(&c->writes[i]));
prt_newline(out);
}
}
#endif
read_attribute(internal_uuid); read_attribute(internal_uuid);
read_attribute(has_data); read_attribute(has_data);
...@@ -448,6 +471,11 @@ SHOW(bch2_fs) ...@@ -448,6 +471,11 @@ SHOW(bch2_fs)
if (attr == &sysfs_data_jobs) if (attr == &sysfs_data_jobs)
data_progress_to_text(out, c); data_progress_to_text(out, c);
#ifdef BCH_WRITE_REF_DEBUG
if (attr == &sysfs_write_refs)
bch2_write_refs_to_text(out, c);
#endif
return 0; return 0;
} }
...@@ -631,6 +659,9 @@ struct attribute *bch2_fs_internal_files[] = { ...@@ -631,6 +659,9 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_stripes_heap, &sysfs_stripes_heap,
&sysfs_open_buckets, &sysfs_open_buckets,
&sysfs_write_points, &sysfs_write_points,
#ifdef BCH_WRITE_REF_DEBUG
&sysfs_write_refs,
#endif
&sysfs_io_timers_read, &sysfs_io_timers_read,
&sysfs_io_timers_write, &sysfs_io_timers_write,
...@@ -682,7 +713,7 @@ STORE(bch2_fs_opts_dir) ...@@ -682,7 +713,7 @@ STORE(bch2_fs_opts_dir)
* We don't need to take c->writes for correctness, but it eliminates an * We don't need to take c->writes for correctness, but it eliminates an
* unsightly error message in the dmesg log when we're RO: * unsightly error message in the dmesg log when we're RO:
*/ */
if (unlikely(!percpu_ref_tryget_live(&c->writes))) if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)))
return -EROFS; return -EROFS;
tmp = kstrdup(buf, GFP_KERNEL); tmp = kstrdup(buf, GFP_KERNEL);
...@@ -712,7 +743,7 @@ STORE(bch2_fs_opts_dir) ...@@ -712,7 +743,7 @@ STORE(bch2_fs_opts_dir)
ret = size; ret = size;
err: err:
percpu_ref_put(&c->writes); bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
return ret; return ret;
} }
SYSFS_OPS(bch2_fs_opts_dir); SYSFS_OPS(bch2_fs_opts_dir);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment