Commit 134915f3 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Go rw lazily

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 65e7ab8f
......@@ -345,6 +345,7 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_JOURNAL_REPLAY|
BTREE_INSERT_NOMARK);
err:
......@@ -1626,7 +1627,7 @@ static bool bch2_fs_allocator_start_fast(struct bch_fs *c)
return ret;
}
static int __bch2_fs_allocator_start(struct bch_fs *c)
int bch2_fs_allocator_start(struct bch_fs *c)
{
struct bch_dev *ca;
unsigned dev_iter;
......@@ -1635,6 +1636,10 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
long bu;
int ret = 0;
if (!test_alloc_startup(c) &&
bch2_fs_allocator_start_fast(c))
return 0;
pr_debug("not enough empty buckets; scanning for reclaimable buckets");
/*
......@@ -1709,31 +1714,6 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
return ret;
}
int bch2_fs_allocator_start(struct bch_fs *c)
{
struct bch_dev *ca;
unsigned i;
int ret;
ret = bch2_fs_allocator_start_fast(c) ? 0 :
__bch2_fs_allocator_start(c);
if (ret)
return ret;
set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags);
for_each_rw_member(ca, c, i) {
ret = bch2_dev_allocator_start(ca);
if (ret) {
percpu_ref_put(&ca->io_ref);
return ret;
}
}
set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
return 0;
}
void bch2_fs_allocator_background_init(struct bch_fs *c)
{
spin_lock_init(&c->freelist_lock);
......
......@@ -486,6 +486,7 @@ enum {
BCH_FS_INITIAL_GC_DONE,
BCH_FS_FSCK_DONE,
BCH_FS_STARTED,
BCH_FS_RW,
/* shutdown: */
BCH_FS_EMERGENCY_RO,
......@@ -510,13 +511,6 @@ struct btree_debug {
struct dentry *failed;
};
enum bch_fs_state {
BCH_FS_STARTING = 0,
BCH_FS_STOPPING,
BCH_FS_RO,
BCH_FS_RW,
};
struct bch_fs_pcpu {
u64 sectors_available;
};
......@@ -538,7 +532,6 @@ struct bch_fs {
/* ro/rw, add/remove devices: */
struct mutex state_lock;
enum bch_fs_state state;
/* Counts outstanding writes, for clean transition to read-only */
struct percpu_ref writes;
......@@ -800,11 +793,6 @@ static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
#endif
}
static inline bool bch2_fs_running(struct bch_fs *c)
{
return c->state == BCH_FS_RO || c->state == BCH_FS_RW;
}
static inline unsigned bucket_bytes(const struct bch_dev *ca)
{
return ca->mi.bucket_size << 9;
......
......@@ -38,6 +38,7 @@ enum {
__BTREE_INSERT_NOUNLOCK,
__BTREE_INSERT_NOFAIL,
__BTREE_INSERT_NOCHECK_RW,
__BTREE_INSERT_LAZY_RW,
__BTREE_INSERT_USE_RESERVE,
__BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY,
......@@ -64,6 +65,7 @@ enum {
#define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL)
#define BTREE_INSERT_NOCHECK_RW (1 << __BTREE_INSERT_NOCHECK_RW)
#define BTREE_INSERT_LAZY_RW (1 << __BTREE_INSERT_LAZY_RW)
/* for copygc, or when merging btree nodes */
#define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE)
......
......@@ -849,8 +849,23 @@ int bch2_trans_commit(struct btree_trans *trans,
btree_insert_entry_checks(trans, i);
if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
!percpu_ref_tryget(&c->writes)))
return -EROFS;
!percpu_ref_tryget(&c->writes))) {
if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)))
return -EROFS;
btree_trans_unlock(trans);
ret = bch2_fs_read_write_early(c);
if (ret)
return ret;
percpu_ref_get(&c->writes);
if (!btree_trans_relock(trans)) {
ret = -EINTR;
goto err;
}
}
retry:
ret = bch2_trans_journal_preres_get(trans);
if (ret)
......
......@@ -1616,7 +1616,7 @@ static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * cons
mutex_lock(&c->state_lock);
if (!bch2_fs_running(c)) {
if (!test_bit(BCH_FS_STARTED, &c->flags)) {
mutex_unlock(&c->state_lock);
closure_put(&c->cl);
pr_err("err mounting %s: incomplete filesystem", dev_name);
......@@ -1672,8 +1672,6 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
return ret;
if (opts.read_only != c->opts.read_only) {
const char *err = NULL;
mutex_lock(&c->state_lock);
if (opts.read_only) {
......@@ -1681,9 +1679,9 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
sb->s_flags |= SB_RDONLY;
} else {
err = bch2_fs_read_write(c);
if (err) {
bch_err(c, "error going rw: %s", err);
ret = bch2_fs_read_write(c);
if (ret) {
bch_err(c, "error going rw: %i", ret);
return -EINVAL;
}
......
......@@ -174,7 +174,8 @@ static int hash_redo_key(const struct bch_hash_desc desc,
bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
tmp, BCH_HASH_SET_MUST_CREATE);
ret = bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW);
err:
kfree(tmp);
return ret;
......@@ -204,7 +205,8 @@ static int fsck_hash_delete_at(const struct bch_hash_desc desc,
ret = bch2_hash_delete_at(&trans, desc, info, iter) ?:
bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL);
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW);
err:
if (ret == -EINTR)
goto retry;
......@@ -365,7 +367,9 @@ static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h,
buf, strlen(buf), d->v.d_name, len)) {
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &d->k_i));
ret = bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
ret = bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW);
if (ret)
goto err;
......@@ -630,7 +634,8 @@ static int check_dirents(struct bch_fs *c)
BTREE_INSERT_ENTRY(iter, &n->k_i));
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW);
kfree(n);
if (ret)
goto err;
......@@ -1268,7 +1273,8 @@ static int check_inode(struct btree_trans *trans,
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &p.inode.k_i));
ret = bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW);
if (ret && ret != -EINTR)
bch_err(c, "error in fs gc: error %i "
"updating inode", ret);
......
......@@ -1027,8 +1027,6 @@ void bch2_fs_journal_start(struct journal *j)
* only have to go down with the next journal entry we write:
*/
bch2_journal_seq_blacklist_write(j);
queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0);
}
/* init/exit: */
......
......@@ -861,6 +861,7 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
ret = bch2_trans_commit(&trans, &disk_res, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_JOURNAL_REPLAY);
} while ((!ret || ret == -EINTR) &&
bkey_cmp(k->k.p, iter->pos));
......@@ -906,6 +907,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
ret = bch2_btree_insert(c, entry->btree_id, k,
NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_JOURNAL_REPLAY|
BTREE_INSERT_NOMARK);
break;
......
......@@ -119,8 +119,13 @@ static int verify_superblock_clean(struct bch_fs *c,
if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
"superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
le64_to_cpu(clean->journal_seq),
le64_to_cpu(j->seq)))
bch2_fs_mark_clean(c, false);
le64_to_cpu(j->seq))) {
ret = bch2_fs_mark_dirty(c);
if (ret) {
bch_err(c, "error going rw");
return ret;
}
}
mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
"superblock read clock doesn't match journal after clean shutdown");
......@@ -331,13 +336,6 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->opts.noreplay)
goto out;
/*
* Mark dirty before journal replay, fsck:
* XXX: after a clean shutdown, this could be done lazily only when fsck
* finds an error
*/
bch2_fs_mark_clean(c, false);
/*
* bch2_fs_journal_start() can't happen sooner, or btree_gc_finish()
* will give spurious errors about oldest_gen > bucket_gen -
......@@ -345,11 +343,6 @@ int bch2_fs_recovery(struct bch_fs *c)
*/
bch2_fs_journal_start(&c->journal);
err = "error starting allocator";
ret = bch2_fs_allocator_start(c);
if (ret)
goto err;
bch_verbose(c, "starting journal replay:");
err = "journal replay failed";
ret = bch2_journal_replay(c, &journal);
......@@ -436,8 +429,8 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_fs_journal_start(&c->journal);
bch2_journal_set_replay_done(&c->journal);
err = "error starting allocator";
ret = bch2_fs_allocator_start(c);
err = "error going read write";
ret = bch2_fs_read_write_early(c);
if (ret)
goto err;
......
......@@ -886,7 +886,7 @@ void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write);
}
static void bch2_fs_mark_dirty(struct bch_fs *c)
int bch2_fs_mark_dirty(struct bch_fs *c)
{
mutex_lock(&c->sb_lock);
if (BCH_SB_CLEAN(c->disk_sb.sb) ||
......@@ -896,6 +896,8 @@ static void bch2_fs_mark_dirty(struct bch_fs *c)
bch2_write_super(c);
}
mutex_unlock(&c->sb_lock);
return 0;
}
struct jset_entry *
......@@ -997,17 +999,12 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
return entry;
}
void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
void bch2_fs_mark_clean(struct bch_fs *c)
{
struct bch_sb_field_clean *sb_clean;
struct jset_entry *entry;
unsigned u64s;
if (!clean) {
bch2_fs_mark_dirty(c);
return;
}
mutex_lock(&c->sb_lock);
if (BCH_SB_CLEAN(c->disk_sb.sb))
goto out;
......
......@@ -141,7 +141,8 @@ bch2_journal_super_entries_add_common(struct bch_fs *,
void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int);
void bch2_fs_mark_clean(struct bch_fs *, bool);
int bch2_fs_mark_dirty(struct bch_fs *);
void bch2_fs_mark_clean(struct bch_fs *);
void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
struct bch_sb_field *);
......
......@@ -258,8 +258,10 @@ static void bch2_writes_disabled(struct percpu_ref *writes)
void bch2_fs_read_only(struct bch_fs *c)
{
if (c->state == BCH_FS_RO)
if (!test_bit(BCH_FS_RW, &c->flags)) {
cancel_delayed_work_sync(&c->journal.reclaim_work);
return;
}
BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
......@@ -301,10 +303,9 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_ERROR, &c->flags) &&
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
test_bit(BCH_FS_STARTED, &c->flags))
bch2_fs_mark_clean(c, true);
bch2_fs_mark_clean(c);
if (c->state != BCH_FS_STOPPING)
c->state = BCH_FS_RO;
clear_bit(BCH_FS_RW, &c->flags);
}
static void bch2_fs_read_only_work(struct work_struct *work)
......@@ -333,55 +334,106 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c)
return ret;
}
const char *bch2_fs_read_write(struct bch_fs *c)
static int bch2_fs_read_write_late(struct bch_fs *c)
{
struct bch_dev *ca;
const char *err = NULL;
unsigned i;
int ret;
if (c->state == BCH_FS_RW)
return NULL;
ret = bch2_gc_thread_start(c);
if (ret) {
bch_err(c, "error starting gc thread");
return ret;
}
for_each_rw_member(ca, c, i) {
ret = bch2_copygc_start(c, ca);
if (ret) {
bch_err(c, "error starting copygc threads");
percpu_ref_put(&ca->io_ref);
return ret;
}
}
ret = bch2_rebalance_start(c);
if (ret) {
bch_err(c, "error starting rebalance thread");
return ret;
}
schedule_delayed_work(&c->pd_controllers_update, 5 * HZ);
return 0;
}
static int __bch2_fs_read_write(struct bch_fs *c, bool early)
{
struct bch_dev *ca;
unsigned i;
int ret;
if (test_bit(BCH_FS_RW, &c->flags))
return 0;
bch2_fs_mark_clean(c, false);
ret = bch2_fs_mark_dirty(c);
if (ret)
goto err;
for_each_rw_member(ca, c, i)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
err = "error starting allocator thread";
for_each_rw_member(ca, c, i)
if (bch2_dev_allocator_start(ca)) {
percpu_ref_put(&ca->io_ref);
if (!test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags)) {
ret = bch2_fs_allocator_start(c);
if (ret) {
bch_err(c, "error initializing allocator");
goto err;
}
set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
err = "error starting btree GC thread";
if (bch2_gc_thread_start(c))
goto err;
set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags);
}
err = "error starting copygc thread";
for_each_rw_member(ca, c, i)
if (bch2_copygc_start(c, ca)) {
for_each_rw_member(ca, c, i) {
ret = bch2_dev_allocator_start(ca);
if (ret) {
bch_err(c, "error starting allocator threads");
percpu_ref_put(&ca->io_ref);
goto err;
}
}
err = "error starting rebalance thread";
if (bch2_rebalance_start(c))
goto err;
set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
schedule_delayed_work(&c->pd_controllers_update, 5 * HZ);
if (!early) {
ret = bch2_fs_read_write_late(c);
if (ret)
goto err;
}
if (c->state != BCH_FS_STARTING)
percpu_ref_reinit(&c->writes);
percpu_ref_reinit(&c->writes);
set_bit(BCH_FS_RW, &c->flags);
c->state = BCH_FS_RW;
return NULL;
queue_delayed_work(c->journal_reclaim_wq,
&c->journal.reclaim_work, 0);
return 0;
err:
__bch2_fs_read_only(c);
return err;
return ret;
}
int bch2_fs_read_write(struct bch_fs *c)
{
return __bch2_fs_read_write(c, false);
}
int bch2_fs_read_write_early(struct bch_fs *c)
{
lockdep_assert_held(&c->state_lock);
if (c->opts.read_only)
return -EROFS;
return __bch2_fs_read_write(c, true);
}
/* Filesystem startup/shutdown: */
......@@ -638,7 +690,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
!(c->journal_reclaim_wq = alloc_workqueue("bcache_journal",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
percpu_ref_init(&c->writes, bch2_writes_disabled, 0, GFP_KERNEL) ||
percpu_ref_init(&c->writes, bch2_writes_disabled,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
mempool_init_kmalloc_pool(&c->btree_reserve_pool, 1,
sizeof(struct btree_reserve)) ||
mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
......@@ -712,7 +765,7 @@ const char *bch2_fs_start(struct bch_fs *c)
mutex_lock(&c->state_lock);
BUG_ON(c->state != BCH_FS_STARTING);
BUG_ON(test_bit(BCH_FS_STARTED, &c->flags));
mutex_lock(&c->sb_lock);
......@@ -746,9 +799,12 @@ const char *bch2_fs_start(struct bch_fs *c)
if (c->opts.read_only) {
bch2_fs_read_only(c);
} else {
err = bch2_fs_read_write(c);
if (err)
if (!test_bit(BCH_FS_RW, &c->flags)
? bch2_fs_read_write(c)
: bch2_fs_read_write_late(c)) {
err = "error going read write";
goto err;
}
}
set_bit(BCH_FS_STARTED, &c->flags);
......
......@@ -217,7 +217,9 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
bool bch2_fs_emergency_read_only(struct bch_fs *);
void bch2_fs_read_only(struct bch_fs *);
const char *bch2_fs_read_write(struct bch_fs *);
int bch2_fs_read_write(struct bch_fs *);
int bch2_fs_read_write_early(struct bch_fs *);
void bch2_fs_stop(struct bch_fs *);
......
......@@ -289,7 +289,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
compressed_sectors_compressed = 0,
compressed_sectors_uncompressed = 0;
if (!bch2_fs_running(c))
if (!test_bit(BCH_FS_STARTED, &c->flags))
return -EPERM;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k)
......@@ -482,7 +482,7 @@ STORE(__bch2_fs)
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
if (!bch2_fs_running(c))
if (!test_bit(BCH_FS_STARTED, &c->flags))
return -EPERM;
/* Debugging: */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment