Commit 932aa837 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: bch2_trans_mark_update()

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent c43a6ef9
...@@ -141,8 +141,8 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *a) ...@@ -141,8 +141,8 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *a)
return ret; return ret;
} }
static void bch2_alloc_pack(struct bkey_i_alloc *dst, void bch2_alloc_pack(struct bkey_i_alloc *dst,
const struct bkey_alloc_unpacked src) const struct bkey_alloc_unpacked src)
{ {
unsigned idx = 0; unsigned idx = 0;
void *d = dst->v.data; void *d = dst->v.data;
...@@ -962,7 +962,6 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans, ...@@ -962,7 +962,6 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
invalidating_cached_data = m.cached_sectors != 0; invalidating_cached_data = m.cached_sectors != 0;
//BUG_ON(u.dirty_sectors);
u.data_type = 0; u.data_type = 0;
u.dirty_sectors = 0; u.dirty_sectors = 0;
u.cached_sectors = 0; u.cached_sectors = 0;
...@@ -974,6 +973,7 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans, ...@@ -974,6 +973,7 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
* we have to trust the in memory bucket @m, not the version in the * we have to trust the in memory bucket @m, not the version in the
* btree: * btree:
*/ */
//BUG_ON(u.dirty_sectors);
u.gen = m.gen + 1; u.gen = m.gen + 1;
a = bkey_alloc_init(&alloc_key.k); a = bkey_alloc_init(&alloc_key.k);
......
...@@ -14,6 +14,8 @@ struct bkey_alloc_unpacked { ...@@ -14,6 +14,8 @@ struct bkey_alloc_unpacked {
}; };
struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *); struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *);
void bch2_alloc_pack(struct bkey_i_alloc *,
const struct bkey_alloc_unpacked);
#define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9) #define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
......
...@@ -1300,6 +1300,7 @@ enum bch_sb_features { ...@@ -1300,6 +1300,7 @@ enum bch_sb_features {
enum bch_sb_compat { enum bch_sb_compat {
BCH_COMPAT_FEAT_ALLOC_INFO = 0, BCH_COMPAT_FEAT_ALLOC_INFO = 0,
BCH_COMPAT_FEAT_ALLOC_METADATA = 1,
}; };
/* options: */ /* options: */
......
...@@ -1005,7 +1005,7 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, ...@@ -1005,7 +1005,7 @@ static int __btree_iter_traverse_all(struct btree_trans *trans,
goto retry_all; goto retry_all;
} }
ret = btree_trans_has_multiple_iters(trans) ? -EINTR : 0; ret = hweight64(trans->iters_live) > 1 ? -EINTR : 0;
out: out:
bch2_btree_cache_cannibalize_unlock(c); bch2_btree_cache_cannibalize_unlock(c);
return ret; return ret;
...@@ -1103,8 +1103,6 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter) ...@@ -1103,8 +1103,6 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
if (unlikely(ret)) if (unlikely(ret))
ret = __btree_iter_traverse_all(iter->trans, iter, ret); ret = __btree_iter_traverse_all(iter->trans, iter, ret);
BUG_ON(ret == -EINTR && !btree_trans_has_multiple_iters(iter->trans));
return ret; return ret;
} }
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <linux/rhashtable.h> #include <linux/rhashtable.h>
#include "bkey_methods.h" #include "bkey_methods.h"
#include "buckets_types.h"
#include "journal_types.h" #include "journal_types.h"
#include "six.h" #include "six.h"
...@@ -264,6 +265,7 @@ struct btree_insert_entry { ...@@ -264,6 +265,7 @@ struct btree_insert_entry {
}; };
bool deferred; bool deferred;
bool triggered;
}; };
#define BTREE_ITER_MAX 64 #define BTREE_ITER_MAX 64
...@@ -302,6 +304,8 @@ struct btree_trans { ...@@ -302,6 +304,8 @@ struct btree_trans {
struct btree_iter iters_onstack[2]; struct btree_iter iters_onstack[2];
struct btree_insert_entry updates_onstack[6]; struct btree_insert_entry updates_onstack[6];
struct replicas_delta_list fs_usage_deltas;
}; };
#define BTREE_FLAG(flag) \ #define BTREE_FLAG(flag) \
......
...@@ -43,8 +43,11 @@ enum { ...@@ -43,8 +43,11 @@ enum {
__BTREE_INSERT_USE_ALLOC_RESERVE, __BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY, __BTREE_INSERT_JOURNAL_REPLAY,
__BTREE_INSERT_JOURNAL_RESERVED, __BTREE_INSERT_JOURNAL_RESERVED,
__BTREE_INSERT_NOMARK_INSERT,
__BTREE_INSERT_NOMARK_OVERWRITES, __BTREE_INSERT_NOMARK_OVERWRITES,
__BTREE_INSERT_NOMARK, __BTREE_INSERT_NOMARK,
__BTREE_INSERT_MARK_INMEM,
__BTREE_INSERT_NO_CLEAR_REPLICAS,
__BTREE_INSERT_NOWAIT, __BTREE_INSERT_NOWAIT,
__BTREE_INSERT_GC_LOCK_HELD, __BTREE_INSERT_GC_LOCK_HELD,
__BCH_HASH_SET_MUST_CREATE, __BCH_HASH_SET_MUST_CREATE,
...@@ -77,12 +80,20 @@ enum { ...@@ -77,12 +80,20 @@ enum {
#define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED) #define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED)
/* Don't mark new key, just overwrites: */
#define BTREE_INSERT_NOMARK_INSERT (1 << __BTREE_INSERT_NOMARK_INSERT)
/* Don't mark overwrites, just new key: */ /* Don't mark overwrites, just new key: */
#define BTREE_INSERT_NOMARK_OVERWRITES (1 << __BTREE_INSERT_NOMARK_OVERWRITES) #define BTREE_INSERT_NOMARK_OVERWRITES (1 << __BTREE_INSERT_NOMARK_OVERWRITES)
/* Don't call bch2_mark_key: */ /* Don't call mark new key at all: */
#define BTREE_INSERT_NOMARK (1 << __BTREE_INSERT_NOMARK) #define BTREE_INSERT_NOMARK (1 << __BTREE_INSERT_NOMARK)
/* Don't mark transactionally: */
#define BTREE_INSERT_MARK_INMEM (1 << __BTREE_INSERT_MARK_INMEM)
#define BTREE_INSERT_NO_CLEAR_REPLICAS (1 << __BTREE_INSERT_NO_CLEAR_REPLICAS)
/* Don't block on allocation failure (for new btree nodes: */ /* Don't block on allocation failure (for new btree nodes: */
#define BTREE_INSERT_NOWAIT (1 << __BTREE_INSERT_NOWAIT) #define BTREE_INSERT_NOWAIT (1 << __BTREE_INSERT_NOWAIT)
#define BTREE_INSERT_GC_LOCK_HELD (1 << __BTREE_INSERT_GC_LOCK_HELD) #define BTREE_INSERT_GC_LOCK_HELD (1 << __BTREE_INSERT_GC_LOCK_HELD)
......
...@@ -526,6 +526,22 @@ static inline void do_btree_insert_one(struct btree_trans *trans, ...@@ -526,6 +526,22 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
btree_insert_key_deferred(trans, insert); btree_insert_key_deferred(trans, insert);
} }
static inline bool update_triggers_transactional(struct btree_trans *trans,
struct btree_insert_entry *i)
{
return likely(!(trans->flags & BTREE_INSERT_MARK_INMEM)) &&
(i->iter->btree_id == BTREE_ID_EXTENTS ||
i->iter->btree_id == BTREE_ID_INODES);
}
static inline bool update_has_triggers(struct btree_trans *trans,
struct btree_insert_entry *i)
{
return likely(!(trans->flags & BTREE_INSERT_NOMARK)) &&
!i->deferred &&
btree_node_type_needs_gc(i->iter->btree_id);
}
/* /*
* Get journal reservation, take write locks, and attempt to do btree update(s): * Get journal reservation, take write locks, and attempt to do btree update(s):
*/ */
...@@ -538,29 +554,25 @@ static inline int do_btree_insert_at(struct btree_trans *trans, ...@@ -538,29 +554,25 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
struct btree_iter *linked; struct btree_iter *linked;
int ret; int ret;
if (likely(!(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS))) {
memset(&trans->fs_usage_deltas.fs_usage, 0,
sizeof(trans->fs_usage_deltas.fs_usage));
trans->fs_usage_deltas.top = trans->fs_usage_deltas.d;
}
trans_for_each_update_iter(trans, i) trans_for_each_update_iter(trans, i)
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK); BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
btree_trans_lock_write(c, trans); trans_for_each_update_iter(trans, i)
if (update_has_triggers(trans, i) &&
if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) { update_triggers_transactional(trans, i)) {
trans_for_each_update_iter(trans, i) { ret = bch2_trans_mark_update(trans, i,
if (i->deferred || &trans->fs_usage_deltas);
!btree_node_type_needs_gc(i->iter->btree_id)) if (ret)
continue; return ret;
if (!fs_usage) {
percpu_down_read(&c->mark_lock);
fs_usage = bch2_fs_usage_scratch_get(c);
}
if (!bch2_bkey_replicas_marked_locked(c,
bkey_i_to_s_c(i->k), true)) {
ret = BTREE_INSERT_NEED_MARK_REPLICAS;
goto out;
}
} }
}
btree_trans_lock_write(c, trans);
if (race_fault()) { if (race_fault()) {
ret = -EINTR; ret = -EINTR;
...@@ -578,6 +590,23 @@ static inline int do_btree_insert_at(struct btree_trans *trans, ...@@ -578,6 +590,23 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
if (ret) if (ret)
goto out; goto out;
trans_for_each_update_iter(trans, i) {
if (i->deferred ||
!btree_node_type_needs_gc(i->iter->btree_id))
continue;
if (!fs_usage) {
percpu_down_read(&c->mark_lock);
fs_usage = bch2_fs_usage_scratch_get(c);
}
if (!bch2_bkey_replicas_marked_locked(c,
bkey_i_to_s_c(i->k), true)) {
ret = BTREE_INSERT_NEED_MARK_REPLICAS;
goto out;
}
}
/* /*
* Don't get journal reservation until after we know insert will * Don't get journal reservation until after we know insert will
* succeed: * succeed:
...@@ -606,20 +635,24 @@ static inline int do_btree_insert_at(struct btree_trans *trans, ...@@ -606,20 +635,24 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
linked->flags |= BTREE_ITER_NOUNLOCK; linked->flags |= BTREE_ITER_NOUNLOCK;
} }
if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) { trans_for_each_update_iter(trans, i)
trans_for_each_update_iter(trans, i) if (update_has_triggers(trans, i) &&
!update_triggers_transactional(trans, i))
bch2_mark_update(trans, i, &fs_usage->u, 0); bch2_mark_update(trans, i, &fs_usage->u, 0);
if (fs_usage)
bch2_trans_fs_usage_apply(trans, fs_usage); if (fs_usage) {
bch2_replicas_delta_list_apply(c, &fs_usage->u,
if (unlikely(c->gc_pos.phase)) { &trans->fs_usage_deltas);
trans_for_each_update_iter(trans, i) bch2_trans_fs_usage_apply(trans, fs_usage);
if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
bch2_mark_update(trans, i, NULL,
BCH_BUCKET_MARK_GC);
}
} }
if (likely(!(trans->flags & BTREE_INSERT_NOMARK)) &&
unlikely(c->gc_pos.phase))
trans_for_each_update_iter(trans, i)
if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
bch2_mark_update(trans, i, NULL,
BCH_BUCKET_MARK_GC);
trans_for_each_update(trans, i) trans_for_each_update(trans, i)
do_btree_insert_one(trans, i); do_btree_insert_one(trans, i);
out: out:
...@@ -646,6 +679,19 @@ int bch2_trans_commit_error(struct btree_trans *trans, ...@@ -646,6 +679,19 @@ int bch2_trans_commit_error(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
unsigned flags = trans->flags; unsigned flags = trans->flags;
struct btree_insert_entry *src, *dst;
src = dst = trans->updates;
while (src < trans->updates + trans->nr_updates) {
if (!src->triggered) {
*dst = *src;
dst++;
}
src++;
}
trans->nr_updates = dst - trans->updates;
/* /*
* BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree * BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
...@@ -808,6 +854,7 @@ int bch2_trans_commit(struct btree_trans *trans, ...@@ -808,6 +854,7 @@ int bch2_trans_commit(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_insert_entry *i; struct btree_insert_entry *i;
unsigned orig_mem_top = trans->mem_top;
int ret = 0; int ret = 0;
if (!trans->nr_updates) if (!trans->nr_updates)
...@@ -885,8 +932,16 @@ int bch2_trans_commit(struct btree_trans *trans, ...@@ -885,8 +932,16 @@ int bch2_trans_commit(struct btree_trans *trans,
return ret; return ret;
err: err:
ret = bch2_trans_commit_error(trans, i, ret); ret = bch2_trans_commit_error(trans, i, ret);
if (!ret)
/* can't loop if it was passed in and we changed it: */
if (unlikely(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS) && !ret)
ret = -EINTR;
if (!ret) {
/* free memory used by triggers, they'll be reexecuted: */
trans->mem_top = orig_mem_top;
goto retry; goto retry;
}
goto out; goto out;
} }
...@@ -969,6 +1024,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, ...@@ -969,6 +1024,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
int ret = 0; int ret = 0;
bch2_trans_init(&trans, c); bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT); iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
...@@ -1014,5 +1070,6 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, ...@@ -1014,5 +1070,6 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
} }
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
BUG_ON(ret == -EINTR);
return ret; return ret;
} }
This diff is collapsed.
...@@ -100,7 +100,7 @@ static inline struct bucket_mark ptr_bucket_mark(struct bch_dev *ca, ...@@ -100,7 +100,7 @@ static inline struct bucket_mark ptr_bucket_mark(struct bch_dev *ca,
struct bucket_mark m; struct bucket_mark m;
rcu_read_lock(); rcu_read_lock();
m = READ_ONCE(bucket(ca, PTR_BUCKET_NR(ca, ptr))->mark); m = READ_ONCE(PTR_BUCKET(ca, ptr, 0)->mark);
rcu_read_unlock(); rcu_read_unlock();
return m; return m;
...@@ -266,6 +266,15 @@ int bch2_mark_overwrite(struct btree_trans *, struct btree_iter *, ...@@ -266,6 +266,15 @@ int bch2_mark_overwrite(struct btree_trans *, struct btree_iter *,
struct bch_fs_usage *, unsigned); struct bch_fs_usage *, unsigned);
int bch2_mark_update(struct btree_trans *, struct btree_insert_entry *, int bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
struct bch_fs_usage *, unsigned); struct bch_fs_usage *, unsigned);
void bch2_replicas_delta_list_apply(struct bch_fs *,
struct bch_fs_usage *,
struct replicas_delta_list *);
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
bool, s64, struct replicas_delta_list *);
int bch2_trans_mark_update(struct btree_trans *,
struct btree_insert_entry *,
struct replicas_delta_list *);
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage_online *); void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage_online *);
/* disk reservations: */ /* disk reservations: */
......
...@@ -93,6 +93,19 @@ struct bch_fs_usage_short { ...@@ -93,6 +93,19 @@ struct bch_fs_usage_short {
u64 nr_inodes; u64 nr_inodes;
}; };
struct replicas_delta {
s64 delta;
struct bch_replicas_entry r;
} __packed;
struct replicas_delta_list {
struct bch_fs_usage fs_usage;
struct replicas_delta *top;
struct replicas_delta d[0];
u8 pad[256];
};
/* /*
* A reservation for space on disk: * A reservation for space on disk:
*/ */
......
...@@ -539,14 +539,17 @@ static int ec_stripe_mem_alloc(struct bch_fs *c, ...@@ -539,14 +539,17 @@ static int ec_stripe_mem_alloc(struct bch_fs *c,
struct btree_iter *iter) struct btree_iter *iter)
{ {
size_t idx = iter->pos.offset; size_t idx = iter->pos.offset;
int ret = 0;
if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT|__GFP_NOWARN)) if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT|__GFP_NOWARN))
return 0; return ret;
bch2_btree_trans_unlock(iter->trans); bch2_btree_trans_unlock(iter->trans);
ret = -EINTR;
if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL)) if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL))
return -EINTR; return ret;
return -ENOMEM; return -ENOMEM;
} }
...@@ -692,23 +695,22 @@ static int ec_stripe_bkey_insert(struct bch_fs *c, ...@@ -692,23 +695,22 @@ static int ec_stripe_bkey_insert(struct bch_fs *c,
if (!ret) if (!ret)
ret = -ENOSPC; ret = -ENOSPC;
goto out; goto err;
found_slot: found_slot:
ret = ec_stripe_mem_alloc(c, iter); ret = ec_stripe_mem_alloc(c, iter);
if (ret == -EINTR)
goto retry;
if (ret) if (ret)
return ret; goto err;
stripe->k.p = iter->pos; stripe->k.p = iter->pos;
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &stripe->k_i)); bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &stripe->k_i));
ret = bch2_trans_commit(&trans, NULL, NULL, ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL| BTREE_INSERT_ATOMIC|
BTREE_INSERT_USE_RESERVE); BTREE_INSERT_NOFAIL);
out: err:
if (ret == -EINTR)
goto retry;
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
...@@ -745,6 +747,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, ...@@ -745,6 +747,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
int ret = 0, dev, idx; int ret = 0, dev, idx;
bch2_trans_init(&trans, c); bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(pos), bkey_start_pos(pos),
......
...@@ -903,15 +903,54 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, ...@@ -903,15 +903,54 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
bch2_btree_iter_verify(iter, l->b); bch2_btree_iter_verify(iter, l->b);
} }
static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
unsigned ret = 0;
bkey_extent_entry_for_each(ptrs, entry) {
switch (__extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_ptr:
case BCH_EXTENT_ENTRY_stripe_ptr:
ret++;
}
}
return ret;
}
static inline struct bpos static inline struct bpos
bch2_extent_atomic_end(struct bkey_i *k, struct btree_iter *iter) bch2_extent_atomic_end(struct bkey_i *insert, struct btree_iter *iter)
{ {
struct btree *b = iter->l[0].b; struct btree *b = iter->l[0].b;
struct btree_node_iter node_iter = iter->l[0].iter;
struct bkey_packed *_k;
unsigned nr_alloc_ptrs =
bch2_bkey_nr_alloc_ptrs(bkey_i_to_s_c(insert));
BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK); BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
BUG_ON(bkey_cmp(bkey_start_pos(&k->k), b->data->min_key) < 0); BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0);
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
KEY_TYPE_discard))) {
struct bkey unpacked;
struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked);
if (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0)
break;
nr_alloc_ptrs += bch2_bkey_nr_alloc_ptrs(k);
if (nr_alloc_ptrs > 20) {
BUG_ON(bkey_cmp(k.k->p, bkey_start_pos(&insert->k)) <= 0);
return bpos_min(insert->k.p, k.k->p);
}
bch2_btree_node_iter_advance(&node_iter, b);
}
return bpos_min(k->k.p, b->key.k.p); return bpos_min(insert->k.p, b->key.k.p);
} }
void bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter) void bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
......
...@@ -43,6 +43,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) ...@@ -43,6 +43,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
int ret = 0; int ret = 0;
bch2_trans_init(&trans, c); bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
POS_MIN, BTREE_ITER_PREFETCH); POS_MIN, BTREE_ITER_PREFETCH);
...@@ -96,6 +97,8 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) ...@@ -96,6 +97,8 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
break; break;
} }
BUG_ON(ret == -EINTR);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
bch2_replicas_gc_end(c, ret); bch2_replicas_gc_end(c, ret);
......
...@@ -62,6 +62,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) ...@@ -62,6 +62,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
int ret = 0; int ret = 0;
bch2_trans_init(&trans, c); bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(&bch2_keylist_front(keys)->k), bkey_start_pos(&bch2_keylist_front(keys)->k),
...@@ -184,6 +185,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) ...@@ -184,6 +185,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
} }
out: out:
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
BUG_ON(ret == -EINTR);
return ret; return ret;
} }
......
...@@ -212,11 +212,6 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k) ...@@ -212,11 +212,6 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
bch2_disk_reservation_init(c, 0); bch2_disk_reservation_init(c, 0);
struct bkey_i *split; struct bkey_i *split;
bool split_compressed = false; bool split_compressed = false;
unsigned flags = BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_JOURNAL_REPLAY|
BTREE_INSERT_NOMARK;
int ret; int ret;
bch2_trans_init(&trans, c); bch2_trans_init(&trans, c);
...@@ -252,9 +247,6 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k) ...@@ -252,9 +247,6 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
BCH_DISK_RESERVATION_NOFAIL); BCH_DISK_RESERVATION_NOFAIL);
BUG_ON(ret); BUG_ON(ret);
flags &= ~BTREE_INSERT_JOURNAL_REPLAY;
flags &= ~BTREE_INSERT_NOMARK;
flags |= BTREE_INSERT_NOMARK_OVERWRITES;
split_compressed = true; split_compressed = true;
} }
...@@ -266,24 +258,31 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k) ...@@ -266,24 +258,31 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
bch2_btree_iter_set_pos(iter, split->k.p); bch2_btree_iter_set_pos(iter, split->k.p);
} while (bkey_cmp(iter->pos, k->k.p) < 0); } while (bkey_cmp(iter->pos, k->k.p) < 0);
ret = bch2_trans_commit(&trans, &disk_res, NULL, flags);
if (ret)
goto err;
if (split_compressed) { if (split_compressed) {
/* memset(&trans.fs_usage_deltas.fs_usage, 0,
* This isn't strictly correct - we should only be relying on sizeof(trans.fs_usage_deltas.fs_usage));
* the btree node lock for synchronization with gc when we've trans.fs_usage_deltas.top = trans.fs_usage_deltas.d;
* got a write lock held.
* ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k), false,
* but - there are other correctness issues if btree gc were to -((s64) k->k.size),
* run before journal replay finishes &trans.fs_usage_deltas) ?:
*/ bch2_trans_commit(&trans, &disk_res, NULL,
BUG_ON(c->gc_pos.phase); BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|
bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size), BTREE_INSERT_LAZY_RW|
NULL, 0, 0); BTREE_INSERT_NOMARK_OVERWRITES|
BTREE_INSERT_NO_CLEAR_REPLICAS);
} else {
ret = bch2_trans_commit(&trans, &disk_res, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_JOURNAL_REPLAY|
BTREE_INSERT_NOMARK);
} }
if (ret)
goto err;
err: err:
if (ret == -EINTR) if (ret == -EINTR)
goto retry; goto retry;
...@@ -527,7 +526,7 @@ static int verify_superblock_clean(struct bch_fs *c, ...@@ -527,7 +526,7 @@ static int verify_superblock_clean(struct bch_fs *c,
struct bch_sb_field_clean *clean = *cleanp; struct bch_sb_field_clean *clean = *cleanp;
int ret = 0; int ret = 0;
if (!clean || !j) if (!c->sb.clean || !j)
return 0; return 0;
if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c, if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
...@@ -653,6 +652,7 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -653,6 +652,7 @@ int bch2_fs_recovery(struct bch_fs *c)
u64 journal_seq; u64 journal_seq;
LIST_HEAD(journal_entries); LIST_HEAD(journal_entries);
struct journal_keys journal_keys = { NULL }; struct journal_keys journal_keys = { NULL };
bool wrote = false, write_sb = false;
int ret; int ret;
if (c->sb.clean) if (c->sb.clean)
...@@ -677,8 +677,12 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -677,8 +677,12 @@ int bch2_fs_recovery(struct bch_fs *c)
if (ret) if (ret)
goto err; goto err;
fsck_err_on(c->sb.clean && !journal_empty(&journal_entries), c, if (mustfix_fsck_err_on(c->sb.clean && !journal_empty(&journal_entries), c,
"filesystem marked clean but journal not empty"); "filesystem marked clean but journal not empty")) {
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->sb.clean = false;
}
if (!c->sb.clean && list_empty(&journal_entries)) { if (!c->sb.clean && list_empty(&journal_entries)) {
bch_err(c, "no journal entries found"); bch_err(c, "no journal entries found");
...@@ -736,12 +740,15 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -736,12 +740,15 @@ int bch2_fs_recovery(struct bch_fs *c)
if (ret) if (ret)
goto err; goto err;
bch_verbose(c, "starting alloc read");
err = "error reading allocation information"; err = "error reading allocation information";
ret = bch2_alloc_read(c, &journal_keys); ret = bch2_alloc_read(c, &journal_keys);
if (ret) if (ret)
goto err; goto err;
bch_verbose(c, "alloc read done");
bch_verbose(c, "starting stripes_read"); bch_verbose(c, "starting stripes_read");
err = "error reading stripes";
ret = bch2_stripes_read(c, &journal_keys); ret = bch2_stripes_read(c, &journal_keys);
if (ret) if (ret)
goto err; goto err;
...@@ -749,11 +756,26 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -749,11 +756,26 @@ int bch2_fs_recovery(struct bch_fs *c)
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags); set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
if ((c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) &&
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA))) {
/*
* interior btree node updates aren't consistent with the
* journal; after an unclean shutdown we have to walk all
* pointers to metadata:
*/
bch_verbose(c, "starting metadata mark and sweep:");
err = "error in mark and sweep";
ret = bch2_gc(c, NULL, true, true);
if (ret)
goto err;
bch_verbose(c, "mark and sweep done");
}
if (c->opts.fsck || if (c->opts.fsck ||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) || !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) { test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
bch_verbose(c, "starting mark and sweep:"); bch_verbose(c, "starting mark and sweep:");
err = "error in recovery"; err = "error in mark and sweep";
ret = bch2_gc(c, &journal_keys, true, false); ret = bch2_gc(c, &journal_keys, true, false);
if (ret) if (ret)
goto err; goto err;
...@@ -780,6 +802,16 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -780,6 +802,16 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err; goto err;
bch_verbose(c, "journal replay done"); bch_verbose(c, "journal replay done");
bch_verbose(c, "writing allocation info:");
err = "error writing out alloc info";
ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW, &wrote) ?:
bch2_alloc_write(c, BTREE_INSERT_LAZY_RW, &wrote);
if (ret) {
bch_err(c, "error writing alloc info");
goto err;
}
bch_verbose(c, "alloc write done");
if (c->opts.norecovery) if (c->opts.norecovery)
goto out; goto out;
...@@ -802,13 +834,23 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -802,13 +834,23 @@ int bch2_fs_recovery(struct bch_fs *c)
c->disk_sb.sb->version_min = c->disk_sb.sb->version_min =
le16_to_cpu(bcachefs_metadata_version_min); le16_to_cpu(bcachefs_metadata_version_min);
c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current); c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
write_sb = true;
}
if (!test_bit(BCH_FS_ERROR, &c->flags)) {
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
write_sb = true;
} }
if (c->opts.fsck && if (c->opts.fsck &&
!test_bit(BCH_FS_ERROR, &c->flags)) { !test_bit(BCH_FS_ERROR, &c->flags)) {
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK; c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0); SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
write_sb = true;
} }
if (write_sb)
bch2_write_super(c);
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
if (c->journal_seq_blacklist_table && if (c->journal_seq_blacklist_table &&
...@@ -821,7 +863,7 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -821,7 +863,7 @@ int bch2_fs_recovery(struct bch_fs *c)
return ret; return ret;
err: err:
fsck_err: fsck_err:
pr_err("Error in recovery: %s (%i)", err, ret); bch_err(c, "Error in recovery: %s (%i)", err, ret);
goto out; goto out;
} }
......
...@@ -102,8 +102,8 @@ static void stripe_to_replicas(struct bkey_s_c k, ...@@ -102,8 +102,8 @@ static void stripe_to_replicas(struct bkey_s_c k,
r->devs[r->nr_devs++] = ptr->dev; r->devs[r->nr_devs++] = ptr->dev;
} }
static void bkey_to_replicas(struct bch_replicas_entry *e, void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
struct bkey_s_c k) struct bkey_s_c k)
{ {
e->nr_devs = 0; e->nr_devs = 0;
...@@ -439,7 +439,7 @@ bool bch2_bkey_replicas_marked_locked(struct bch_fs *c, ...@@ -439,7 +439,7 @@ bool bch2_bkey_replicas_marked_locked(struct bch_fs *c,
return false; return false;
} }
bkey_to_replicas(&search.e, k); bch2_bkey_to_replicas(&search.e, k);
return bch2_replicas_marked_locked(c, &search.e, check_gc_replicas); return bch2_replicas_marked_locked(c, &search.e, check_gc_replicas);
} }
...@@ -472,7 +472,7 @@ int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) ...@@ -472,7 +472,7 @@ int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
return ret; return ret;
} }
bkey_to_replicas(&search.e, k); bch2_bkey_to_replicas(&search.e, k);
return bch2_mark_replicas(c, &search.e); return bch2_mark_replicas(c, &search.e);
} }
......
...@@ -28,6 +28,7 @@ int bch2_mark_replicas(struct bch_fs *, ...@@ -28,6 +28,7 @@ int bch2_mark_replicas(struct bch_fs *,
bool bch2_bkey_replicas_marked_locked(struct bch_fs *, bool bch2_bkey_replicas_marked_locked(struct bch_fs *,
struct bkey_s_c, bool); struct bkey_s_c, bool);
void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c);
bool bch2_bkey_replicas_marked(struct bch_fs *, bool bch2_bkey_replicas_marked(struct bch_fs *,
struct bkey_s_c, bool); struct bkey_s_c, bool);
int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c); int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
......
...@@ -946,7 +946,7 @@ int bch2_fs_mark_dirty(struct bch_fs *c) ...@@ -946,7 +946,7 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false); SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO); c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA);
ret = bch2_write_super(c); ret = bch2_write_super(c);
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
...@@ -1063,6 +1063,7 @@ void bch2_fs_mark_clean(struct bch_fs *c) ...@@ -1063,6 +1063,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
SET_BCH_SB_CLEAN(c->disk_sb.sb, true); SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO; c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA;
u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved; u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment