Commit 719fe7fb authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Update transactional triggers interface to pass old & new keys

This is needed to fix a bug where we're overflowing iterators within a
btree transaction, because we're updating the stripes btree (to update
block counts) and the stripes btree trigger is unnecessarily updating
the alloc btree - it doesn't need to update the alloc btree when the
pointers within a stripe aren't changing.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 66bddc6c
......@@ -519,14 +519,18 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
trans->journal_pin = &as->journal;
for_each_keylist_key(&as->new_keys, k) {
ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
ret = bch2_trans_mark_key(trans,
bkey_s_c_null,
bkey_i_to_s_c(k),
0, 0, BTREE_TRIGGER_INSERT);
if (ret)
return ret;
}
for_each_keylist_key(&as->old_keys, k) {
ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
ret = bch2_trans_mark_key(trans,
bkey_i_to_s_c(k),
bkey_s_c_null,
0, 0, BTREE_TRIGGER_OVERWRITE);
if (ret)
return ret;
......
......@@ -1338,10 +1338,8 @@ static int bch2_mark_key_locked(struct bch_fs *c,
ret = bch2_mark_stripe(c, old, new, fs_usage, journal_seq, flags);
break;
case KEY_TYPE_inode:
if (!(flags & BTREE_TRIGGER_OVERWRITE))
fs_usage->nr_inodes++;
else
fs_usage->nr_inodes--;
fs_usage->nr_inodes += new.k->type == KEY_TYPE_inode;
fs_usage->nr_inodes -= old.k->type == KEY_TYPE_inode;
break;
case KEY_TYPE_reservation: {
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
......@@ -1405,10 +1403,10 @@ int bch2_mark_update(struct btree_trans *trans,
old = (struct bkey_s_c) { &unpacked, NULL };
if (!btree_node_type_is_extents(iter->btree_id)) {
/* iterators should be uptodate, shouldn't get errors here: */
if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
_old = bch2_btree_node_iter_peek(&node_iter, b);
if (_old)
old = bkey_disassemble(b, _old, &unpacked);
old = bch2_btree_iter_peek_slot(iter);
BUG_ON(bkey_err(old));
} else {
struct bkey_cached *ck = (void *) iter->l[0].b;
......@@ -1753,59 +1751,92 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
return 0;
}
static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans,
const struct bch_extent_ptr *ptr,
s64 sectors, bool parity)
{
struct bkey_i_alloc *a;
struct btree_iter *iter;
struct bkey_alloc_unpacked u;
int ret;
ret = bch2_trans_start_alloc_update(trans, &iter, ptr, &u);
if (ret)
return ret;
if (parity) {
u.dirty_sectors += sectors;
u.data_type = u.dirty_sectors
? BCH_DATA_parity
: 0;
}
a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
ret = PTR_ERR_OR_ZERO(a);
if (ret)
goto err;
bkey_alloc_init(&a->k_i);
a->k.p = iter->pos;
bch2_alloc_pack(a, u);
bch2_trans_update(trans, iter, &a->k_i, 0);
err:
bch2_trans_iter_put(trans, iter);
return ret;
}
static int bch2_trans_mark_stripe(struct btree_trans *trans,
struct bkey_s_c k,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
unsigned nr_data = s->nr_blocks - s->nr_redundant;
const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
? bkey_s_c_to_stripe(old).v : NULL;
const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
? bkey_s_c_to_stripe(new).v : NULL;
struct bch_replicas_padded r;
struct bkey_alloc_unpacked u;
struct bkey_i_alloc *a;
struct btree_iter *iter;
bool deleting = flags & BTREE_TRIGGER_OVERWRITE;
s64 sectors = le16_to_cpu(s->sectors);
unsigned i;
int ret = 0;
if (deleting)
sectors = -sectors;
bch2_bkey_to_replicas(&r.e, k);
update_replicas_list(trans, &r.e, sectors * s->nr_redundant);
/*
* The allocator code doesn't necessarily update bucket gens in the
* btree when incrementing them, right before handing out new buckets -
* we just need to persist those updates here along with the new stripe:
* If the pointers aren't changing, we don't need to do anything:
*/
if (new_s && old_s &&
!memcmp(old_s->ptrs, new_s->ptrs,
new_s->nr_blocks * sizeof(struct bch_extent_ptr)))
return 0;
for (i = 0; i < s->nr_blocks && !ret; i++) {
bool parity = i >= nr_data;
if (new_s) {
unsigned nr_data = new_s->nr_blocks - new_s->nr_redundant;
s64 sectors = le16_to_cpu(new_s->sectors);
ret = bch2_trans_start_alloc_update(trans, &iter,
&s->ptrs[i], &u);
if (ret)
break;
bch2_bkey_to_replicas(&r.e, new);
update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant);
if (parity) {
u.dirty_sectors += sectors;
u.data_type = u.dirty_sectors
? BCH_DATA_parity
: 0;
for (i = 0; i < new_s->nr_blocks; i++) {
bool parity = i >= nr_data;
ret = bch2_trans_mark_stripe_alloc_ref(trans,
&new_s->ptrs[i], sectors, parity);
if (ret)
return ret;
}
}
a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
ret = PTR_ERR_OR_ZERO(a);
if (ret)
goto put_iter;
bkey_alloc_init(&a->k_i);
a->k.p = iter->pos;
bch2_alloc_pack(a, u);
bch2_trans_update(trans, iter, &a->k_i, 0);
put_iter:
bch2_trans_iter_put(trans, iter);
if (old_s) {
unsigned nr_data = old_s->nr_blocks - old_s->nr_redundant;
s64 sectors = -((s64) le16_to_cpu(old_s->sectors));
bch2_bkey_to_replicas(&r.e, old);
update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant);
for (i = 0; i < old_s->nr_blocks; i++) {
bool parity = i >= nr_data;
ret = bch2_trans_mark_stripe_alloc_ref(trans,
&old_s->ptrs[i], sectors, parity);
if (ret)
return ret;
}
}
return ret;
......@@ -1904,11 +1935,16 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
return ret;
}
int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
int bch2_trans_mark_key(struct btree_trans *trans,
struct bkey_s_c old,
struct bkey_s_c new,
unsigned offset, s64 sectors, unsigned flags)
{
struct replicas_delta_list *d;
struct bch_fs *c = trans->c;
struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old;
struct replicas_delta_list *d;
BUG_ON(!(flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)));
switch (k.k->type) {
case KEY_TYPE_btree_ptr:
......@@ -1924,15 +1960,18 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
return bch2_trans_mark_extent(trans, k, offset, sectors,
flags, BCH_DATA_user);
case KEY_TYPE_stripe:
return bch2_trans_mark_stripe(trans, k, flags);
case KEY_TYPE_inode:
d = replicas_deltas_realloc(trans, 0);
return bch2_trans_mark_stripe(trans, old, new, flags);
case KEY_TYPE_inode: {
int nr = (new.k->type == KEY_TYPE_inode) -
(old.k->type == KEY_TYPE_inode);
if (nr) {
d = replicas_deltas_realloc(trans, 0);
d->nr_inodes += nr;
}
if (!(flags & BTREE_TRIGGER_OVERWRITE))
d->nr_inodes++;
else
d->nr_inodes--;
return 0;
}
case KEY_TYPE_reservation: {
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
......@@ -1956,12 +1995,10 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
int bch2_trans_mark_update(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_i *insert,
struct bkey_i *new,
unsigned flags)
{
struct btree *b = iter_l(iter)->b;
struct btree_node_iter node_iter = iter_l(iter)->iter;
struct bkey_packed *_k;
struct bkey_s_c old;
int ret;
if (unlikely(flags & BTREE_TRIGGER_NORUN))
......@@ -1970,68 +2007,93 @@ int bch2_trans_mark_update(struct btree_trans *trans,
if (!btree_node_type_needs_gc(iter->btree_id))
return 0;
ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(insert),
0, insert->k.size, BTREE_TRIGGER_INSERT);
if (ret)
return ret;
if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
struct bkey_cached *ck = (void *) iter->l[0].b;
if (!btree_node_type_is_extents(iter->btree_id)) {
/* iterators should be uptodate, shouldn't get errors here: */
if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
old = bch2_btree_iter_peek_slot(iter);
BUG_ON(bkey_err(old));
} else {
struct bkey_cached *ck = (void *) iter->l[0].b;
return bch2_trans_mark_key(trans, bkey_i_to_s_c(ck->k),
0, 0, BTREE_TRIGGER_OVERWRITE);
}
BUG_ON(!ck->valid);
old = bkey_i_to_s_c(ck->k);
}
while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
if (old.k->type == new->k.type) {
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
} else {
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
BTREE_TRIGGER_INSERT|flags) ?:
bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
BTREE_TRIGGER_OVERWRITE|flags);
}
} else {
struct btree *b = iter_l(iter)->b;
struct btree_node_iter node_iter = iter_l(iter)->iter;
struct bkey_packed *_old;
struct bkey unpacked;
struct bkey_s_c k;
unsigned offset = 0;
s64 sectors = 0;
unsigned flags = BTREE_TRIGGER_OVERWRITE;
k = bkey_disassemble(b, _k, &unpacked);
EBUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
if (btree_node_is_extents(b)
? bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0
: bkey_cmp(insert->k.p, k.k->p))
break;
bkey_init(&unpacked);
old = (struct bkey_s_c) { &unpacked, NULL };
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
0, new->k.size,
BTREE_TRIGGER_INSERT);
if (ret)
return ret;
while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) {
unsigned flags = BTREE_TRIGGER_OVERWRITE;
unsigned offset = 0;
s64 sectors;
old = bkey_disassemble(b, _old, &unpacked);
sectors = -((s64) old.k->size);
flags |= BTREE_TRIGGER_OVERWRITE;
if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0)
return 0;
if (btree_node_is_extents(b)) {
switch (bch2_extent_overlap(&insert->k, k.k)) {
switch (bch2_extent_overlap(&new->k, old.k)) {
case BCH_EXTENT_OVERLAP_ALL:
offset = 0;
sectors = -((s64) k.k->size);
sectors = -((s64) old.k->size);
break;
case BCH_EXTENT_OVERLAP_BACK:
offset = bkey_start_offset(&insert->k) -
bkey_start_offset(k.k);
sectors = bkey_start_offset(&insert->k) -
k.k->p.offset;
offset = bkey_start_offset(&new->k) -
bkey_start_offset(old.k);
sectors = bkey_start_offset(&new->k) -
old.k->p.offset;
break;
case BCH_EXTENT_OVERLAP_FRONT:
offset = 0;
sectors = bkey_start_offset(k.k) -
insert->k.p.offset;
sectors = bkey_start_offset(old.k) -
new->k.p.offset;
break;
case BCH_EXTENT_OVERLAP_MIDDLE:
offset = bkey_start_offset(&insert->k) -
bkey_start_offset(k.k);
sectors = -((s64) insert->k.size);
offset = bkey_start_offset(&new->k) -
bkey_start_offset(old.k);
sectors = -((s64) new->k.size);
flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
break;
}
BUG_ON(sectors >= 0);
}
ret = bch2_trans_mark_key(trans, k, offset, sectors, flags);
if (ret)
return ret;
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
offset, sectors, flags);
if (ret)
return ret;
bch2_btree_node_iter_advance(&node_iter, b);
bch2_btree_node_iter_advance(&node_iter, b);
}
}
return 0;
return ret;
}
/* Disk reservations: */
......
......@@ -270,7 +270,7 @@ int bch2_mark_update(struct btree_trans *, struct btree_iter *,
int bch2_replicas_delta_list_apply(struct bch_fs *,
struct bch_fs_usage *,
struct replicas_delta_list *);
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c,
unsigned, s64, unsigned);
int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter,
struct bkey_i *insert, unsigned);
......
......@@ -458,7 +458,9 @@ static int bch2_extent_replay_key(struct bch_fs *c, enum btree_id btree_id,
bch2_btree_iter_set_pos(iter, split->k.p);
if (remark) {
ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(split),
ret = bch2_trans_mark_key(&trans,
bkey_s_c_null,
bkey_i_to_s_c(split),
0, split->k.size,
BTREE_TRIGGER_INSERT);
if (ret)
......@@ -467,7 +469,9 @@ static int bch2_extent_replay_key(struct bch_fs *c, enum btree_id btree_id,
} while (bkey_cmp(iter->pos, k->k.p) < 0);
if (remark) {
ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k),
ret = bch2_trans_mark_key(&trans,
bkey_i_to_s_c(k),
bkey_s_c_null,
0, -((s64) k->k.size),
BTREE_TRIGGER_OVERWRITE);
if (ret)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment