Commit 460651ee authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Various improvements to bch2_alloc_write()

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 932aa837
...@@ -129,15 +129,21 @@ static inline void put_alloc_field(struct bkey_i_alloc *a, void **p, ...@@ -129,15 +129,21 @@ static inline void put_alloc_field(struct bkey_i_alloc *a, void **p,
*p += bytes; *p += bytes;
} }
struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *a) struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
{ {
struct bkey_alloc_unpacked ret = { .gen = a->gen }; struct bkey_alloc_unpacked ret = { .gen = 0 };
if (k.k->type == KEY_TYPE_alloc) {
const struct bch_alloc *a = bkey_s_c_to_alloc(k).v;
const void *d = a->data; const void *d = a->data;
unsigned idx = 0; unsigned idx = 0;
ret.gen = a->gen;
#define x(_name, _bits) ret._name = get_alloc_field(a, &d, idx++); #define x(_name, _bits) ret._name = get_alloc_field(a, &d, idx++);
BCH_ALLOC_FIELDS() BCH_ALLOC_FIELDS()
#undef x #undef x
}
return ret; return ret;
} }
...@@ -199,66 +205,18 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, ...@@ -199,66 +205,18 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
get_alloc_field(a.v, &d, i)); get_alloc_field(a.v, &d, i));
} }
static void __alloc_read_key(struct bucket *g, const struct bch_alloc *a) static inline struct bkey_alloc_unpacked
{ alloc_mem_to_key(struct bucket *g, struct bucket_mark m)
const void *d = a->data;
unsigned idx = 0, data_type, dirty_sectors, cached_sectors;
struct bucket_mark m;
g->io_time[READ] = get_alloc_field(a, &d, idx++);
g->io_time[WRITE] = get_alloc_field(a, &d, idx++);
data_type = get_alloc_field(a, &d, idx++);
dirty_sectors = get_alloc_field(a, &d, idx++);
cached_sectors = get_alloc_field(a, &d, idx++);
g->oldest_gen = get_alloc_field(a, &d, idx++);
bucket_cmpxchg(g, m, ({
m.gen = a->gen;
m.data_type = data_type;
m.dirty_sectors = dirty_sectors;
m.cached_sectors = cached_sectors;
}));
g->gen_valid = 1;
}
static void __alloc_write_key(struct bkey_i_alloc *a, struct bucket *g,
struct bucket_mark m)
{ {
unsigned idx = 0; return (struct bkey_alloc_unpacked) {
void *d = a->v.data; .gen = m.gen,
.oldest_gen = g->oldest_gen,
a->v.fields = 0; .data_type = m.data_type,
a->v.gen = m.gen; .dirty_sectors = m.dirty_sectors,
.cached_sectors = m.cached_sectors,
d = a->v.data; .read_time = g->io_time[READ],
put_alloc_field(a, &d, idx++, g->io_time[READ]); .write_time = g->io_time[WRITE],
put_alloc_field(a, &d, idx++, g->io_time[WRITE]); };
put_alloc_field(a, &d, idx++, m.data_type);
put_alloc_field(a, &d, idx++, m.dirty_sectors);
put_alloc_field(a, &d, idx++, m.cached_sectors);
put_alloc_field(a, &d, idx++, g->oldest_gen);
set_bkey_val_bytes(&a->k, (void *) d - (void *) &a->v);
}
static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k)
{
struct bch_dev *ca;
struct bkey_s_c_alloc a;
if (k.k->type != KEY_TYPE_alloc)
return;
a = bkey_s_c_to_alloc(k);
ca = bch_dev_bkey_exists(c, a.k->p.inode);
if (a.k->p.offset >= ca->mi.nbuckets)
return;
percpu_down_read(&c->mark_lock);
__alloc_read_key(bucket(ca, a.k->p.offset), a.v);
percpu_up_read(&c->mark_lock);
} }
int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys) int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
...@@ -274,7 +232,7 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys) ...@@ -274,7 +232,7 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
bch2_trans_init(&trans, c); bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_ALLOC, POS_MIN, 0, k, ret) for_each_btree_key(&trans, iter, BTREE_ID_ALLOC, POS_MIN, 0, k, ret)
bch2_alloc_read_key(c, k); bch2_mark_key(c, k, true, 0, NULL, 0, 0);
ret = bch2_trans_exit(&trans) ?: ret; ret = bch2_trans_exit(&trans) ?: ret;
if (ret) { if (ret) {
...@@ -284,7 +242,8 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys) ...@@ -284,7 +242,8 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
for_each_journal_key(*journal_keys, j) for_each_journal_key(*journal_keys, j)
if (j->btree_id == BTREE_ID_ALLOC) if (j->btree_id == BTREE_ID_ALLOC)
bch2_alloc_read_key(c, bkey_i_to_s_c(j->k)); bch2_mark_key(c, bkey_i_to_s_c(j->k),
true, 0, NULL, 0, 0);
percpu_down_write(&c->mark_lock); percpu_down_write(&c->mark_lock);
bch2_dev_usage_from_buckets(c); bch2_dev_usage_from_buckets(c);
...@@ -352,81 +311,32 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k) ...@@ -352,81 +311,32 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
return ret; return ret;
} }
static int __bch2_alloc_write_key(struct btree_trans *trans, struct bch_dev *ca,
size_t b, struct btree_iter *iter,
unsigned flags)
{
struct bch_fs *c = trans->c;
#if 0
__BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key;
#else
/* hack: */
__BKEY_PADDED(k, 8) alloc_key;
#endif
struct bkey_i_alloc *a = bkey_alloc_init(&alloc_key.k);
struct bucket *g;
struct bucket_mark m, new;
int ret;
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
a->k.p = POS(ca->dev_idx, b);
bch2_btree_iter_set_pos(iter, a->k.p);
ret = bch2_btree_iter_traverse(iter);
if (ret)
return ret;
percpu_down_read(&c->mark_lock);
g = bucket(ca, b);
m = READ_ONCE(g->mark);
if (!m.dirty) {
percpu_up_read(&c->mark_lock);
return 0;
}
__alloc_write_key(a, g, m);
percpu_up_read(&c->mark_lock);
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
ret = bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_NOMARK|
flags);
if (ret)
return ret;
new = m;
new.dirty = false;
atomic64_cmpxchg(&g->_mark.v, m.v.counter, new.v.counter);
if (ca->buckets_written)
set_bit(b, ca->buckets_written);
return 0;
}
int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote) int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
{ {
struct btree_trans trans; struct btree_trans trans;
struct btree_iter *iter; struct btree_iter *iter;
struct bucket_array *buckets; struct bucket_array *buckets;
struct bch_dev *ca; struct bch_dev *ca;
struct bucket *g;
struct bucket_mark m, new;
struct bkey_alloc_unpacked old_u, new_u;
__BKEY_PADDED(k, 8) alloc_key; /* hack: */
struct bkey_i_alloc *a;
struct bkey_s_c k;
unsigned i; unsigned i;
size_t b; size_t b;
int ret = 0; int ret = 0;
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
bch2_trans_init(&trans, c); bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN, iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT); BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
for_each_rw_member(ca, c, i) { for_each_rw_member(ca, c, i) {
relock:
down_read(&ca->bucket_lock); down_read(&ca->bucket_lock);
restart:
buckets = bucket_array(ca); buckets = bucket_array(ca);
for (b = buckets->first_bucket; for (b = buckets->first_bucket;
...@@ -435,27 +345,70 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote) ...@@ -435,27 +345,70 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
if (!buckets->b[b].mark.dirty) if (!buckets->b[b].mark.dirty)
continue; continue;
bch2_btree_iter_set_pos(iter, POS(i, b));
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
old_u = bch2_alloc_unpack(k);
percpu_down_read(&c->mark_lock);
g = bucket(ca, b);
m = READ_ONCE(g->mark);
new_u = alloc_mem_to_key(g, m);
percpu_up_read(&c->mark_lock);
if (!m.dirty)
continue;
if ((flags & BTREE_INSERT_LAZY_RW) && if ((flags & BTREE_INSERT_LAZY_RW) &&
percpu_ref_is_zero(&c->writes)) { percpu_ref_is_zero(&c->writes)) {
up_read(&ca->bucket_lock); up_read(&ca->bucket_lock);
bch2_trans_unlock(&trans); bch2_trans_unlock(&trans);
ret = bch2_fs_read_write_early(c); ret = bch2_fs_read_write_early(c);
down_read(&ca->bucket_lock);
if (ret) if (ret)
goto out; goto err;
goto relock; goto restart;
} }
ret = __bch2_alloc_write_key(&trans, ca, b, a = bkey_alloc_init(&alloc_key.k);
iter, flags); a->k.p = iter->pos;
bch2_alloc_pack(a, new_u);
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_NOMARK|
flags);
err:
if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) {
bch_err(c, "error %i writing alloc info", ret);
printk(KERN_CONT "dev %llu bucket %llu\n",
iter->pos.inode, iter->pos.offset);
printk(KERN_CONT "gen %u -> %u\n", old_u.gen, new_u.gen);
#define x(_name, _bits) printk(KERN_CONT #_name " %u -> %u\n", old_u._name, new_u._name);
BCH_ALLOC_FIELDS()
#undef x
}
if (ret) if (ret)
break; break;
new = m;
new.dirty = false;
atomic64_cmpxchg(&g->_mark.v, m.v.counter, new.v.counter);
if (ca->buckets_written)
set_bit(b, ca->buckets_written);
bch2_trans_cond_resched(&trans); bch2_trans_cond_resched(&trans);
*wrote = true; *wrote = true;
} }
up_read(&ca->bucket_lock); up_read(&ca->bucket_lock);
out:
if (ret) { if (ret) {
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
break; break;
...@@ -922,6 +875,7 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans, ...@@ -922,6 +875,7 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_i_alloc *a; struct bkey_i_alloc *a;
struct bkey_alloc_unpacked u; struct bkey_alloc_unpacked u;
struct bucket *g;
struct bucket_mark m; struct bucket_mark m;
struct bkey_s_c k; struct bkey_s_c k;
bool invalidating_cached_data; bool invalidating_cached_data;
...@@ -941,7 +895,6 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans, ...@@ -941,7 +895,6 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
BUG_ON(!fifo_push(&ca->free_inc, b)); BUG_ON(!fifo_push(&ca->free_inc, b));
bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0); bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0);
m = bucket(ca, b)->mark;
spin_unlock(&c->freelist_lock); spin_unlock(&c->freelist_lock);
percpu_up_read(&c->mark_lock); percpu_up_read(&c->mark_lock);
...@@ -955,27 +908,26 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans, ...@@ -955,27 +908,26 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
if (k.k && k.k->type == KEY_TYPE_alloc) /*
u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v); * The allocator has to start before journal replay is finished - thus,
else * we have to trust the in memory bucket @m, not the version in the
memset(&u, 0, sizeof(u)); * btree:
*/
percpu_down_read(&c->mark_lock);
g = bucket(ca, b);
m = READ_ONCE(g->mark);
u = alloc_mem_to_key(g, m);
percpu_up_read(&c->mark_lock);
invalidating_cached_data = m.cached_sectors != 0; invalidating_cached_data = m.cached_sectors != 0;
u.gen++;
u.data_type = 0; u.data_type = 0;
u.dirty_sectors = 0; u.dirty_sectors = 0;
u.cached_sectors = 0; u.cached_sectors = 0;
u.read_time = c->bucket_clock[READ].hand; u.read_time = c->bucket_clock[READ].hand;
u.write_time = c->bucket_clock[WRITE].hand; u.write_time = c->bucket_clock[WRITE].hand;
/*
* The allocator has to start before journal replay is finished - thus,
* we have to trust the in memory bucket @m, not the version in the
* btree:
*/
//BUG_ON(u.dirty_sectors);
u.gen = m.gen + 1;
a = bkey_alloc_init(&alloc_key.k); a = bkey_alloc_init(&alloc_key.k);
a->k.p = iter->pos; a->k.p = iter->pos;
bch2_alloc_pack(a, u); bch2_alloc_pack(a, u);
......
...@@ -13,7 +13,7 @@ struct bkey_alloc_unpacked { ...@@ -13,7 +13,7 @@ struct bkey_alloc_unpacked {
#undef x #undef x
}; };
struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *); struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
void bch2_alloc_pack(struct bkey_i_alloc *, void bch2_alloc_pack(struct bkey_i_alloc *,
const struct bkey_alloc_unpacked); const struct bkey_alloc_unpacked);
......
...@@ -649,9 +649,13 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k, ...@@ -649,9 +649,13 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
if (flags & BCH_BUCKET_MARK_GC) if (flags & BCH_BUCKET_MARK_GC)
return 0; return 0;
u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
ca = bch_dev_bkey_exists(c, k.k->p.inode); ca = bch_dev_bkey_exists(c, k.k->p.inode);
if (k.k->p.offset >= ca->mi.nbuckets)
return 0;
g = __bucket(ca, k.k->p.offset, gc); g = __bucket(ca, k.k->p.offset, gc);
u = bch2_alloc_unpack(k);
old = bucket_data_cmpxchg(c, ca, fs_usage, g, m, ({ old = bucket_data_cmpxchg(c, ca, fs_usage, g, m, ({
m.gen = u.gen; m.gen = u.gen;
...@@ -1381,7 +1385,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, ...@@ -1381,7 +1385,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
goto out; goto out;
} }
u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v); u = bch2_alloc_unpack(k);
if (gen_after(u.gen, p.ptr.gen)) { if (gen_after(u.gen, p.ptr.gen)) {
ret = 1; ret = 1;
......
...@@ -1234,11 +1234,6 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote) ...@@ -1234,11 +1234,6 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
return ret; return ret;
} }
static void bch2_stripe_read_key(struct bch_fs *c, struct bkey_s_c k)
{
bch2_mark_key(c, k, true, 0, NULL, 0, 0);
}
int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys) int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
{ {
struct journal_key *i; struct journal_key *i;
...@@ -1254,7 +1249,7 @@ int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys) ...@@ -1254,7 +1249,7 @@ int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
bch2_trans_init(&trans, c); bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN, 0, k, ret) for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN, 0, k, ret)
bch2_stripe_read_key(c, k); bch2_mark_key(c, k, true, 0, NULL, 0, 0);
ret = bch2_trans_exit(&trans) ?: ret; ret = bch2_trans_exit(&trans) ?: ret;
if (ret) { if (ret) {
...@@ -1264,7 +1259,8 @@ int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys) ...@@ -1264,7 +1259,8 @@ int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
for_each_journal_key(*journal_keys, i) for_each_journal_key(*journal_keys, i)
if (i->btree_id == BTREE_ID_EC) if (i->btree_id == BTREE_ID_EC)
bch2_stripe_read_key(c, bkey_i_to_s_c(i->k)); bch2_mark_key(c, bkey_i_to_s_c(i->k),
true, 0, NULL, 0, 0);
return 0; return 0;
} }
......
...@@ -947,7 +947,6 @@ static void journal_write_done(struct closure *cl) ...@@ -947,7 +947,6 @@ static void journal_write_done(struct closure *cl)
return; return;
err: err:
bch2_fatal_error(c); bch2_fatal_error(c);
bch2_journal_halt(j);
spin_lock(&j->lock); spin_lock(&j->lock);
goto out; goto out;
} }
...@@ -1059,7 +1058,6 @@ void bch2_journal_write(struct closure *cl) ...@@ -1059,7 +1058,6 @@ void bch2_journal_write(struct closure *cl)
spin_unlock(&j->lock); spin_unlock(&j->lock);
if (ret) { if (ret) {
bch2_journal_halt(j);
bch_err(c, "Unable to allocate journal write"); bch_err(c, "Unable to allocate journal write");
bch2_fatal_error(c); bch2_fatal_error(c);
continue_at(cl, journal_write_done, system_highpri_wq); continue_at(cl, journal_write_done, system_highpri_wq);
......
...@@ -198,17 +198,14 @@ static void __bch2_fs_read_only(struct bch_fs *c) ...@@ -198,17 +198,14 @@ static void __bch2_fs_read_only(struct bch_fs *c)
do { do {
wrote = false; wrote = false;
ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote); ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?:
if (ret) { bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
bch2_fs_inconsistent(c, "error writing out stripes");
break;
}
ret = bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote); if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
if (ret) {
bch2_fs_inconsistent(c, "error writing out alloc info %i", ret); bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
if (ret)
break; break;
}
for_each_member_device(ca, c, i) for_each_member_device(ca, c, i)
bch2_dev_allocator_quiesce(c, ca); bch2_dev_allocator_quiesce(c, ca);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment