Commit ba6dd1dd authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Improve stripe triggers/heap code

Soon we'll be able to modify existing stripes - replacing empty blocks
with new blocks and new p/q blocks. This patch updates the trigger code
to handle pointers changing in an existing stripe; also, it
significantly improves how the stripes heap works, which means we can
get rid of the stripe creation/deletion lock.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent e63534a2
......@@ -755,7 +755,6 @@ struct bch_fs {
/* STRIPES: */
GENRADIX(struct stripe) stripes[2];
struct mutex ec_stripe_create_lock;
ec_stripes_heap ec_stripes_heap;
spinlock_t ec_stripes_heap_lock;
......
......@@ -619,8 +619,11 @@ static int bch2_gc_done(struct bch_fs *c,
copy_stripe_field(block_sectors[i],
"block_sectors[%u]", i);
if (dst->alive)
if (dst->alive) {
spin_lock(&c->ec_stripes_heap_lock);
bch2_stripes_heap_insert(c, dst, dst_iter.pos);
spin_unlock(&c->ec_stripes_heap_lock);
}
genradix_iter_advance(&dst_iter, &c->stripes[0]);
genradix_iter_advance(&src_iter, &c->stripes[1]);
......
......@@ -884,51 +884,46 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
}
static void bucket_set_stripe(struct bch_fs *c,
const struct bch_stripe *v,
const struct bch_extent_ptr *ptr,
struct bch_fs_usage *fs_usage,
u64 journal_seq,
unsigned flags,
bool enabled)
{
bool gc = flags & BTREE_TRIGGER_GC;
unsigned i;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bucket *g = PTR_BUCKET(ca, ptr, gc);
struct bucket_mark new, old;
for (i = 0; i < v->nr_blocks; i++) {
const struct bch_extent_ptr *ptr = v->ptrs + i;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bucket *g = PTR_BUCKET(ca, ptr, gc);
struct bucket_mark new, old;
old = bucket_cmpxchg(g, new, ({
new.stripe = enabled;
if (journal_seq) {
new.journal_seq_valid = 1;
new.journal_seq = journal_seq;
}
}));
old = bucket_cmpxchg(g, new, ({
new.stripe = enabled;
if (journal_seq) {
new.journal_seq_valid = 1;
new.journal_seq = journal_seq;
}
}));
bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
/*
* XXX write repair code for these, flag stripe as possibly bad
*/
if (old.gen != ptr->gen)
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"stripe with stale pointer");
/*
* XXX write repair code for these, flag stripe as possibly bad
*/
if (old.gen != ptr->gen)
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"stripe with stale pointer");
#if 0
/*
* We'd like to check for these, but these checks don't work
* yet:
*/
if (old.stripe && enabled)
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"multiple stripes using same bucket");
if (!old.stripe && !enabled)
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"deleting stripe but bucket not marked as stripe bucket");
/*
* We'd like to check for these, but these checks don't work
* yet:
*/
if (old.stripe && enabled)
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"multiple stripes using same bucket");
if (!old.stripe && !enabled)
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"deleting stripe but bucket not marked as stripe bucket");
#endif
}
}
static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k,
......@@ -1070,8 +1065,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
{
bool gc = flags & BTREE_TRIGGER_GC;
struct stripe *m;
unsigned old, new;
int blocks_nonempty_delta;
unsigned i, blocks_nonempty = 0;
m = genradix_ptr(&c->stripes[gc], p.idx);
......@@ -1090,20 +1084,17 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
*nr_parity = m->nr_redundant;
*r = m->r;
old = m->block_sectors[p.block];
m->block_sectors[p.block] += sectors;
new = m->block_sectors[p.block];
blocks_nonempty_delta = (int) !!new - (int) !!old;
if (blocks_nonempty_delta) {
m->blocks_nonempty += blocks_nonempty_delta;
for (i = 0; i < m->nr_blocks; i++)
blocks_nonempty += m->block_sectors[i] != 0;
if (m->blocks_nonempty != blocks_nonempty) {
m->blocks_nonempty = blocks_nonempty;
if (!gc)
bch2_stripes_heap_update(c, m, p.idx);
}
m->dirty = true;
spin_unlock(&c->ec_stripes_heap_lock);
return 0;
......@@ -1207,10 +1198,11 @@ static int bch2_mark_stripe(struct bch_fs *c,
if (!new_s) {
/* Deleting: */
bucket_set_stripe(c, old_s, fs_usage,
journal_seq, flags, false);
for (i = 0; i < old_s->nr_blocks; i++)
bucket_set_stripe(c, old_s->ptrs + i, fs_usage,
journal_seq, flags, false);
if (!gc) {
if (!gc && m->on_heap) {
spin_lock(&c->ec_stripes_heap_lock);
bch2_stripes_heap_del(c, m, idx);
spin_unlock(&c->ec_stripes_heap_lock);
......@@ -1221,10 +1213,21 @@ static int bch2_mark_stripe(struct bch_fs *c,
BUG_ON(old_s && new_s->nr_blocks != old_s->nr_blocks);
BUG_ON(old_s && new_s->nr_redundant != old_s->nr_redundant);
if (!old_s)
bucket_set_stripe(c, new_s, fs_usage,
journal_seq, flags, true);
for (i = 0; i < new_s->nr_blocks; i++) {
if (!old_s ||
memcmp(new_s->ptrs + i,
old_s->ptrs + i,
sizeof(struct bch_extent_ptr))) {
if (old_s)
bucket_set_stripe(c, old_s->ptrs + i, fs_usage,
journal_seq, flags, false);
bucket_set_stripe(c, new_s->ptrs + i, fs_usage,
journal_seq, flags, true);
}
}
m->alive = true;
m->sectors = le16_to_cpu(new_s->sectors);
m->algorithm = new_s->algorithm;
m->nr_blocks = new_s->nr_blocks;
......@@ -1248,8 +1251,6 @@ static int bch2_mark_stripe(struct bch_fs *c,
bch2_stripes_heap_update(c, m, idx);
spin_unlock(&c->ec_stripes_heap_lock);
}
m->alive = true;
}
return 0;
......
......@@ -607,39 +607,16 @@ static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
BUG_ON(h->data[m->heap_idx].idx != idx);
}
void bch2_stripes_heap_update(struct bch_fs *c,
struct stripe *m, size_t idx)
{
ec_stripes_heap *h = &c->ec_stripes_heap;
size_t i;
if (m->alive) {
heap_verify_backpointer(c, idx);
h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
i = m->heap_idx;
heap_sift_up(h, i, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
heap_sift_down(h, i, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
heap_verify_backpointer(c, idx);
} else {
bch2_stripes_heap_insert(c, m, idx);
}
if (stripe_idx_to_delete(c) >= 0 &&
!percpu_ref_is_dying(&c->writes))
schedule_work(&c->ec_stripe_delete_work);
}
void bch2_stripes_heap_del(struct bch_fs *c,
struct stripe *m, size_t idx)
{
if (!m->on_heap)
return;
m->on_heap = false;
heap_verify_backpointer(c, idx);
m->alive = false;
heap_del(&c->ec_stripes_heap, m->heap_idx,
ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
......@@ -648,19 +625,49 @@ void bch2_stripes_heap_del(struct bch_fs *c,
void bch2_stripes_heap_insert(struct bch_fs *c,
struct stripe *m, size_t idx)
{
if (m->on_heap)
return;
BUG_ON(heap_full(&c->ec_stripes_heap));
m->on_heap = true;
heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
.idx = idx,
.blocks_nonempty = m->blocks_nonempty,
}),
ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
m->alive = true;
heap_verify_backpointer(c, idx);
}
void bch2_stripes_heap_update(struct bch_fs *c,
struct stripe *m, size_t idx)
{
ec_stripes_heap *h = &c->ec_stripes_heap;
size_t i;
if (!m->on_heap)
return;
heap_verify_backpointer(c, idx);
h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
i = m->heap_idx;
heap_sift_up(h, i, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
heap_sift_down(h, i, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
heap_verify_backpointer(c, idx);
if (stripe_idx_to_delete(c) >= 0 &&
!percpu_ref_is_dying(&c->writes))
schedule_work(&c->ec_stripe_delete_work);
}
/* stripe deletion */
static int ec_stripe_delete(struct bch_fs *c, size_t idx)
......@@ -677,23 +684,20 @@ static void ec_stripe_delete_work(struct work_struct *work)
container_of(work, struct bch_fs, ec_stripe_delete_work);
ssize_t idx;
down_read(&c->gc_lock);
mutex_lock(&c->ec_stripe_create_lock);
while (1) {
spin_lock(&c->ec_stripes_heap_lock);
idx = stripe_idx_to_delete(c);
spin_unlock(&c->ec_stripes_heap_lock);
if (idx < 0)
if (idx < 0) {
spin_unlock(&c->ec_stripes_heap_lock);
break;
}
bch2_stripes_heap_del(c, genradix_ptr(&c->stripes[0], idx), idx);
spin_unlock(&c->ec_stripes_heap_lock);
if (ec_stripe_delete(c, idx))
break;
}
mutex_unlock(&c->ec_stripe_create_lock);
up_read(&c->gc_lock);
}
/* stripe creation: */
......@@ -846,6 +850,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
struct bch_fs *c = s->c;
struct open_bucket *ob;
struct bkey_i *k;
struct stripe *m;
struct bch_stripe *v = &s->stripe.key.v;
unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
struct closure cl;
......@@ -882,12 +887,10 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err_put_writes;
}
mutex_lock(&c->ec_stripe_create_lock);
ret = ec_stripe_bkey_insert(c, &s->stripe.key);
if (ret) {
bch_err(c, "error creating stripe: error creating stripe key");
goto err_unlock;
goto err_put_writes;
}
for_each_keylist_key(&s->keys, k) {
......@@ -896,8 +899,11 @@ static void ec_stripe_create(struct ec_stripe_new *s)
break;
}
err_unlock:
mutex_unlock(&c->ec_stripe_create_lock);
spin_lock(&c->ec_stripes_heap_lock);
m = genradix_ptr(&c->stripes[0], s->stripe.key.k.p.offset);
BUG_ON(m->on_heap);
bch2_stripes_heap_insert(c, m, s->stripe.key.k.p.offset);
spin_unlock(&c->ec_stripes_heap_lock);
err_put_writes:
percpu_ref_put(&c->writes);
err:
......@@ -1280,11 +1286,21 @@ static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
{
int ret = 0;
if (k.k->type == KEY_TYPE_stripe)
if (k.k->type == KEY_TYPE_stripe) {
struct stripe *m;
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
bch2_mark_key(c, k, 0, 0, NULL, 0,
BTREE_TRIGGER_ALLOC_READ|
BTREE_TRIGGER_NOATOMIC);
if (ret)
return ret;
spin_lock(&c->ec_stripes_heap_lock);
m = genradix_ptr(&c->stripes[0], k.k->p.offset);
bch2_stripes_heap_insert(c, m, k.k->p.offset);
spin_unlock(&c->ec_stripes_heap_lock);
}
return ret;
}
......@@ -1335,6 +1351,24 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
return 0;
}
void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
{
ec_stripes_heap *h = &c->ec_stripes_heap;
struct stripe *m;
size_t i;
spin_lock(&c->ec_stripes_heap_lock);
for (i = 0; i < min(h->used, 20UL); i++) {
m = genradix_ptr(&c->stripes[0], h->data[i].idx);
pr_buf(out, "%zu %u/%u+%u\n", h->data[i].idx,
h->data[i].blocks_nonempty,
m->nr_blocks - m->nr_redundant,
m->nr_redundant);
}
spin_unlock(&c->ec_stripes_heap_lock);
}
void bch2_fs_ec_exit(struct bch_fs *c)
{
struct ec_stripe_head *h;
......
......@@ -157,6 +157,8 @@ int bch2_stripes_write(struct bch_fs *, unsigned, bool *);
int bch2_ec_mem_alloc(struct bch_fs *, bool);
void bch2_stripes_heap_to_text(struct printbuf *, struct bch_fs *);
void bch2_fs_ec_exit(struct bch_fs *);
int bch2_fs_ec_init(struct bch_fs *);
......
......@@ -22,6 +22,7 @@ struct stripe {
unsigned alive:1;
unsigned dirty:1;
unsigned on_heap:1;
u8 blocks_nonempty;
u16 block_sectors[EC_STRIPE_MAX];
......
......@@ -678,7 +678,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
INIT_LIST_HEAD(&c->ec_new_stripe_list);
mutex_init(&c->ec_new_stripe_lock);
mutex_init(&c->ec_stripe_create_lock);
spin_lock_init(&c->ec_stripes_heap_lock);
seqcount_init(&c->gc_pos_lock);
......
......@@ -168,6 +168,7 @@ read_attribute(btree_updates);
read_attribute(dirty_btree_nodes);
read_attribute(btree_key_cache);
read_attribute(btree_transactions);
read_attribute(stripes_heap);
read_attribute(internal_uuid);
......@@ -418,6 +419,13 @@ SHOW(bch2_fs)
return out.pos - buf;
}
if (attr == &sysfs_stripes_heap) {
struct printbuf out = _PBUF(buf, PAGE_SIZE);
bch2_stripes_heap_to_text(&out, c);
return out.pos - buf;
}
if (attr == &sysfs_compression_stats)
return bch2_compression_stats(c, buf);
......@@ -583,6 +591,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_dirty_btree_nodes,
&sysfs_btree_key_cache,
&sysfs_btree_transactions,
&sysfs_stripes_heap,
&sysfs_read_realloc_races,
&sysfs_extent_migrate_done,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment