Commit 6671a708 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Refactor bch2_alloc_write()

Major simplification - gets rid of the need for marking buckets as
dirty, instead we write buckets if the in memory mark is different from
what's in the btree.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 67163cde
...@@ -258,46 +258,68 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys) ...@@ -258,46 +258,68 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
return 0; return 0;
} }
int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k) enum alloc_write_ret {
ALLOC_WROTE,
ALLOC_NOWROTE,
ALLOC_END,
};
static int bch2_alloc_write_key(struct btree_trans *trans,
struct btree_iter *iter,
unsigned flags)
{ {
struct btree_trans trans; struct bch_fs *c = trans->c;
struct btree_iter *iter; struct bkey_s_c k;
struct bch_dev *ca; struct bch_dev *ca;
struct bucket_array *ba;
struct bucket *g;
struct bucket_mark m;
struct bkey_alloc_unpacked old_u, new_u;
__BKEY_PADDED(k, 8) alloc_key; /* hack: */
struct bkey_i_alloc *a;
int ret; int ret;
retry:
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
if (k->k.p.inode >= c->sb.nr_devices || old_u = bch2_alloc_unpack(k);
!c->devs[k->k.p.inode])
return 0;
ca = bch_dev_bkey_exists(c, k->k.p.inode);
if (k->k.p.offset >= ca->mi.nbuckets) if (iter->pos.inode >= c->sb.nr_devices ||
return 0; !c->devs[iter->pos.inode])
return ALLOC_END;
bch2_trans_init(&trans, c, 0, 0); percpu_down_read(&c->mark_lock);
ca = bch_dev_bkey_exists(c, iter->pos.inode);
ba = bucket_array(ca);
iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, k->k.p, if (iter->pos.offset >= ba->nbuckets) {
BTREE_ITER_INTENT); percpu_up_read(&c->mark_lock);
return ALLOC_END;
}
ret = bch2_btree_iter_traverse(iter); g = &ba->b[iter->pos.offset];
if (ret) m = READ_ONCE(g->mark);
goto err; new_u = alloc_mem_to_key(g, m);
percpu_up_read(&c->mark_lock);
/* check buckets_written with btree node locked: */ if (!bkey_alloc_unpacked_cmp(old_u, new_u))
if (test_bit(k->k.p.offset, ca->buckets_written)) { return ALLOC_NOWROTE;
ret = 0;
goto err;
}
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, k)); a = bkey_alloc_init(&alloc_key.k);
a->k.p = iter->pos;
bch2_alloc_pack(a, new_u);
ret = bch2_trans_commit(&trans, NULL, NULL, bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
ret = bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOMARK|
BTREE_INSERT_JOURNAL_REPLAY| flags);
BTREE_INSERT_NOMARK);
err: err:
bch2_trans_exit(&trans); if (ret == -EINTR)
goto retry;
return ret; return ret;
} }
...@@ -305,16 +327,8 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote) ...@@ -305,16 +327,8 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
{ {
struct btree_trans trans; struct btree_trans trans;
struct btree_iter *iter; struct btree_iter *iter;
struct bucket_array *buckets;
struct bch_dev *ca; struct bch_dev *ca;
struct bucket *g;
struct bucket_mark m, new;
struct bkey_alloc_unpacked old_u, new_u;
__BKEY_PADDED(k, 8) alloc_key; /* hack: */
struct bkey_i_alloc *a;
struct bkey_s_c k;
unsigned i; unsigned i;
size_t b;
int ret = 0; int ret = 0;
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
...@@ -325,81 +339,24 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote) ...@@ -325,81 +339,24 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
BTREE_ITER_SLOTS|BTREE_ITER_INTENT); BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
for_each_rw_member(ca, c, i) { for_each_rw_member(ca, c, i) {
down_read(&ca->bucket_lock); unsigned first_bucket;
restart:
buckets = bucket_array(ca);
for (b = buckets->first_bucket;
b < buckets->nbuckets;
b++) {
if (!buckets->b[b].mark.dirty)
continue;
bch2_btree_iter_set_pos(iter, POS(i, b));
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
old_u = bch2_alloc_unpack(k);
percpu_down_read(&c->mark_lock);
g = bucket(ca, b);
m = READ_ONCE(g->mark);
new_u = alloc_mem_to_key(g, m);
percpu_up_read(&c->mark_lock);
if (!m.dirty) percpu_down_read(&c->mark_lock);
continue; first_bucket = bucket_array(ca)->first_bucket;
percpu_up_read(&c->mark_lock);
if ((flags & BTREE_INSERT_LAZY_RW) &&
percpu_ref_is_zero(&c->writes)) {
up_read(&ca->bucket_lock);
bch2_trans_unlock(&trans);
ret = bch2_fs_read_write_early(c);
down_read(&ca->bucket_lock);
if (ret)
goto err;
goto restart;
}
a = bkey_alloc_init(&alloc_key.k); bch2_btree_iter_set_pos(iter, POS(i, first_bucket));
a->k.p = iter->pos;
bch2_alloc_pack(a, new_u);
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &a->k_i)); while (1) {
ret = bch2_trans_commit(&trans, NULL, NULL, ret = bch2_alloc_write_key(&trans, iter, flags);
BTREE_INSERT_NOFAIL| if (ret < 0 || ret == ALLOC_END)
BTREE_INSERT_NOMARK|
flags);
err:
if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) {
bch_err(c, "error %i writing alloc info", ret);
printk(KERN_CONT "dev %llu bucket %llu\n",
iter->pos.inode, iter->pos.offset);
printk(KERN_CONT "gen %u -> %u\n", old_u.gen, new_u.gen);
#define x(_name, _bits) printk(KERN_CONT #_name " %u -> %u\n", old_u._name, new_u._name);
BCH_ALLOC_FIELDS()
#undef x
}
if (ret)
break; break;
if (ret == ALLOC_WROTE)
new = m; *wrote = true;
new.dirty = false; bch2_btree_iter_next_slot(iter);
atomic64_cmpxchg(&g->_mark.v, m.v.counter, new.v.counter);
if (ca->buckets_written)
set_bit(b, ca->buckets_written);
bch2_trans_cond_resched(&trans);
*wrote = true;
} }
up_read(&ca->bucket_lock);
if (ret) { if (ret < 0) {
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
break; break;
} }
...@@ -407,7 +364,27 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote) ...@@ -407,7 +364,27 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret < 0 ? ret : 0;
}
int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
{
struct btree_trans trans;
struct btree_iter *iter;
int ret;
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, k->k.p,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
ret = bch2_alloc_write_key(&trans, iter,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_JOURNAL_REPLAY|
BTREE_INSERT_NOMARK);
bch2_trans_exit(&trans);
return ret < 0 ? ret : 0;
} }
/* Bucket IO clocks: */ /* Bucket IO clocks: */
...@@ -954,10 +931,6 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans, ...@@ -954,10 +931,6 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
if (!top->nr) if (!top->nr)
heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL); heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
/* with btree still locked: */
if (ca->buckets_written)
set_bit(b, ca->buckets_written);
/* /*
* Make sure we flush the last journal entry that updated this * Make sure we flush the last journal entry that updated this
* bucket (i.e. deleting the last reference) before writing to * bucket (i.e. deleting the last reference) before writing to
......
...@@ -13,6 +13,17 @@ struct bkey_alloc_unpacked { ...@@ -13,6 +13,17 @@ struct bkey_alloc_unpacked {
#undef x #undef x
}; };
/* returns true if not equal */
static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
struct bkey_alloc_unpacked r)
{
return l.gen != r.gen
#define x(_name, _bits) || l._name != r._name
BCH_ALLOC_FIELDS()
#undef x
;
}
struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c); struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
void bch2_alloc_pack(struct bkey_i_alloc *, void bch2_alloc_pack(struct bkey_i_alloc *,
const struct bkey_alloc_unpacked); const struct bkey_alloc_unpacked);
......
...@@ -412,7 +412,6 @@ struct bch_dev { ...@@ -412,7 +412,6 @@ struct bch_dev {
*/ */
struct bucket_array __rcu *buckets[2]; struct bucket_array __rcu *buckets[2];
unsigned long *buckets_nouse; unsigned long *buckets_nouse;
unsigned long *buckets_written;
struct rw_semaphore bucket_lock; struct rw_semaphore bucket_lock;
struct bch_dev_usage __percpu *usage[2]; struct bch_dev_usage __percpu *usage[2];
......
...@@ -148,7 +148,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k, ...@@ -148,7 +148,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
"type %u gen %u", "type %u gen %u",
k.k->type, ptr->gen)) { k.k->type, ptr->gen)) {
g2->_mark.gen = g->_mark.gen = ptr->gen; g2->_mark.gen = g->_mark.gen = ptr->gen;
g2->_mark.dirty = g->_mark.dirty = true;
g2->gen_valid = g->gen_valid = true; g2->gen_valid = g->gen_valid = true;
} }
...@@ -156,7 +155,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k, ...@@ -156,7 +155,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
"%u ptr gen in the future: %u > %u", "%u ptr gen in the future: %u > %u",
k.k->type, ptr->gen, g->mark.gen)) { k.k->type, ptr->gen, g->mark.gen)) {
g2->_mark.gen = g->_mark.gen = ptr->gen; g2->_mark.gen = g->_mark.gen = ptr->gen;
g2->_mark.dirty = g->_mark.dirty = true;
g2->gen_valid = g->gen_valid = true; g2->gen_valid = g->gen_valid = true;
set_bit(BCH_FS_FIXED_GENS, &c->flags); set_bit(BCH_FS_FIXED_GENS, &c->flags);
} }
...@@ -528,7 +526,6 @@ static int bch2_gc_done(struct bch_fs *c, ...@@ -528,7 +526,6 @@ static int bch2_gc_done(struct bch_fs *c,
": got %u, should be %u", i, b, \ ": got %u, should be %u", i, b, \
dst->b[b].mark._f, src->b[b].mark._f); \ dst->b[b].mark._f, src->b[b].mark._f); \
dst->b[b]._mark._f = src->b[b].mark._f; \ dst->b[b]._mark._f = src->b[b].mark._f; \
dst->b[b]._mark.dirty = true; \
} }
#define copy_dev_field(_f, _msg, ...) \ #define copy_dev_field(_f, _msg, ...) \
copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__) copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__)
...@@ -580,10 +577,7 @@ static int bch2_gc_done(struct bch_fs *c, ...@@ -580,10 +577,7 @@ static int bch2_gc_done(struct bch_fs *c,
copy_bucket_field(dirty_sectors); copy_bucket_field(dirty_sectors);
copy_bucket_field(cached_sectors); copy_bucket_field(cached_sectors);
if (dst->b[b].oldest_gen != src->b[b].oldest_gen) { dst->b[b].oldest_gen = src->b[b].oldest_gen;
dst->b[b].oldest_gen = src->b[b].oldest_gen;
dst->b[b]._mark.dirty = true;
}
} }
}; };
......
...@@ -634,7 +634,6 @@ static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, ...@@ -634,7 +634,6 @@ static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
BUG_ON(!is_available_bucket(new)); BUG_ON(!is_available_bucket(new));
new.owned_by_allocator = true; new.owned_by_allocator = true;
new.dirty = true;
new.data_type = 0; new.data_type = 0;
new.cached_sectors = 0; new.cached_sectors = 0;
new.dirty_sectors = 0; new.dirty_sectors = 0;
...@@ -774,7 +773,6 @@ static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, ...@@ -774,7 +773,6 @@ static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
type != BCH_DATA_JOURNAL); type != BCH_DATA_JOURNAL);
old = bucket_cmpxchg(g, new, ({ old = bucket_cmpxchg(g, new, ({
new.dirty = true;
new.data_type = type; new.data_type = type;
overflow = checked_add(new.dirty_sectors, sectors); overflow = checked_add(new.dirty_sectors, sectors);
})); }));
...@@ -849,7 +847,6 @@ static void bucket_set_stripe(struct bch_fs *c, ...@@ -849,7 +847,6 @@ static void bucket_set_stripe(struct bch_fs *c,
struct bucket_mark new, old; struct bucket_mark new, old;
old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
new.dirty = true;
new.stripe = enabled; new.stripe = enabled;
if (journal_seq) { if (journal_seq) {
new.journal_seq_valid = 1; new.journal_seq_valid = 1;
...@@ -896,8 +893,6 @@ static bool bch2_mark_pointer(struct bch_fs *c, ...@@ -896,8 +893,6 @@ static bool bch2_mark_pointer(struct bch_fs *c,
do { do {
new.v.counter = old.v.counter = v; new.v.counter = old.v.counter = v;
new.dirty = true;
/* /*
* Check this after reading bucket mark to guard against * Check this after reading bucket mark to guard against
* the allocator invalidating a bucket after we've already * the allocator invalidating a bucket after we've already
...@@ -1882,7 +1877,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ...@@ -1882,7 +1877,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
{ {
struct bucket_array *buckets = NULL, *old_buckets = NULL; struct bucket_array *buckets = NULL, *old_buckets = NULL;
unsigned long *buckets_nouse = NULL; unsigned long *buckets_nouse = NULL;
unsigned long *buckets_written = NULL;
alloc_fifo free[RESERVE_NR]; alloc_fifo free[RESERVE_NR];
alloc_fifo free_inc; alloc_fifo free_inc;
alloc_heap alloc_heap; alloc_heap alloc_heap;
...@@ -1911,9 +1905,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ...@@ -1911,9 +1905,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
!(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) * !(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
sizeof(unsigned long), sizeof(unsigned long),
GFP_KERNEL|__GFP_ZERO)) || GFP_KERNEL|__GFP_ZERO)) ||
!(buckets_written = kvpmalloc(BITS_TO_LONGS(nbuckets) *
sizeof(unsigned long),
GFP_KERNEL|__GFP_ZERO)) ||
!init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) || !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
!init_fifo(&free[RESERVE_MOVINGGC], !init_fifo(&free[RESERVE_MOVINGGC],
copygc_reserve, GFP_KERNEL) || copygc_reserve, GFP_KERNEL) ||
...@@ -1945,16 +1936,12 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ...@@ -1945,16 +1936,12 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
memcpy(buckets_nouse, memcpy(buckets_nouse,
ca->buckets_nouse, ca->buckets_nouse,
BITS_TO_LONGS(n) * sizeof(unsigned long)); BITS_TO_LONGS(n) * sizeof(unsigned long));
memcpy(buckets_written,
ca->buckets_written,
BITS_TO_LONGS(n) * sizeof(unsigned long));
} }
rcu_assign_pointer(ca->buckets[0], buckets); rcu_assign_pointer(ca->buckets[0], buckets);
buckets = old_buckets; buckets = old_buckets;
swap(ca->buckets_nouse, buckets_nouse); swap(ca->buckets_nouse, buckets_nouse);
swap(ca->buckets_written, buckets_written);
if (resize) if (resize)
percpu_up_write(&c->mark_lock); percpu_up_write(&c->mark_lock);
...@@ -1994,8 +1981,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ...@@ -1994,8 +1981,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
free_fifo(&free[i]); free_fifo(&free[i]);
kvpfree(buckets_nouse, kvpfree(buckets_nouse,
BITS_TO_LONGS(nbuckets) * sizeof(unsigned long)); BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
kvpfree(buckets_written,
BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
if (buckets) if (buckets)
call_rcu(&old_buckets->rcu, buckets_free_rcu); call_rcu(&old_buckets->rcu, buckets_free_rcu);
...@@ -2011,8 +1996,6 @@ void bch2_dev_buckets_free(struct bch_dev *ca) ...@@ -2011,8 +1996,6 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
free_fifo(&ca->free_inc); free_fifo(&ca->free_inc);
for (i = 0; i < RESERVE_NR; i++) for (i = 0; i < RESERVE_NR; i++)
free_fifo(&ca->free[i]); free_fifo(&ca->free[i]);
kvpfree(ca->buckets_written,
BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
kvpfree(ca->buckets_nouse, kvpfree(ca->buckets_nouse,
BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
kvpfree(rcu_dereference_protected(ca->buckets[0], 1), kvpfree(rcu_dereference_protected(ca->buckets[0], 1),
......
...@@ -15,7 +15,6 @@ struct bucket_mark { ...@@ -15,7 +15,6 @@ struct bucket_mark {
u8 gen; u8 gen;
u8 data_type:3, u8 data_type:3,
owned_by_allocator:1, owned_by_allocator:1,
dirty:1,
journal_seq_valid:1, journal_seq_valid:1,
stripe:1; stripe:1;
u16 dirty_sectors; u16 dirty_sectors;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment