Commit a111901f authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: bch2_propagate_key_to_snapshot_leaves()

If fsck finds a key that needs work done, the primary example being an
unlinked inode that needs to be deleted, and the key is in an internal
snapshot node, we have a bit of a conundrum.

The conundrum is that internal snapshot nodes are shared, and we in
general do updates in internal snapshot nodes because there may be
overwrites in some snapshots and not others, and this may affect other
keys referenced by this key (i.e. extents).

For example, we might be seeing an unlinked inode in an internal
snapshot node, but then in one child snapshot the inode might have been
reattached and might not be unlinked. Deleting the inode in the internal
snapshot node would be wrong, because then we'll delete all the extents
that the child snapshot references.

But if an unlinked inode does not have any overwrites in child
snapshots, we're fine: the inode is overwrritten in all child snapshots,
so we can do the deletion at the point of comonality in the snapshot
tree, i.e. the node where we found it.

This patch adds a new helper, bch2_propagate_key_to_snapshot_leaves(),
to handle the case where we need a to update a key that does have
overwrites in child snapshots: we copy the key to leaf snapshot nodes,
and then rewind fsck and process the needed updates there.

With this, fsck can now always correctly handle unlinked inodes found in
internal snapshot nodes.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent f55d6e07
......@@ -853,14 +853,6 @@ static int check_inode(struct btree_trans *trans,
if (ret)
goto err;
/*
* if snapshot id isn't a leaf node, skip it - deletion in
* particular is not atomic, so on the internal snapshot nodes
* we can see inodes marked for deletion after a clean shutdown
*/
if (bch2_snapshot_is_internal_node(c, k.k->p.snapshot))
return 0;
if (!bkey_is_inode(k.k))
return 0;
......@@ -882,6 +874,27 @@ static int check_inode(struct btree_trans *trans,
return -EINVAL;
}
if ((u.bi_flags & (BCH_INODE_I_SIZE_DIRTY|BCH_INODE_UNLINKED)) &&
bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) {
struct bpos new_min_pos;
ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos);
if (ret)
goto err;
u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY|BCH_INODE_UNLINKED;
ret = __write_inode(trans, &u, iter->pos.snapshot);
if (ret) {
bch_err_msg(c, ret, "in fsck: error updating inode");
return ret;
}
if (!bpos_eq(new_min_pos, POS_MIN))
bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos));
return 0;
}
if (u.bi_flags & BCH_INODE_UNLINKED &&
(!c->sb.clean ||
fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
......@@ -960,9 +973,10 @@ static int check_inode(struct btree_trans *trans,
if (do_update) {
ret = __write_inode(trans, &u, iter->pos.snapshot);
if (ret)
bch_err(c, "error in fsck: error updating inode: %s",
bch2_err_str(ret));
if (ret) {
bch_err_msg(c, ret, "in fsck: error updating inode");
return ret;
}
}
err:
fsck_err:
......
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "bkey_buf.h"
#include "btree_key_cache.h"
#include "btree_update.h"
#include "buckets.h"
......@@ -1536,7 +1537,7 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *c)
}
int bch2_delete_dead_snapshots_hook(struct btree_trans *trans,
struct btree_trans_commit_hook *h)
struct btree_trans_commit_hook *h)
{
struct bch_fs *c = trans->c;
......@@ -1583,6 +1584,94 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
return ret;
}
static u32 bch2_snapshot_smallest_child(struct bch_fs *c, u32 id)
{
const struct snapshot_t *s = snapshot_t(c, id);
return s->children[1] ?: s->children[0];
}
static u32 bch2_snapshot_smallest_descendent(struct bch_fs *c, u32 id)
{
u32 child;
while ((child = bch2_snapshot_smallest_child(c, id)))
id = child;
return id;
}
static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans,
enum btree_id btree,
struct bkey_s_c interior_k,
u32 leaf_id, struct bpos *new_min_pos)
{
struct btree_iter iter;
struct bpos pos = interior_k.k->p;
struct bkey_s_c k;
struct bkey_i *new;
int ret;
pos.snapshot = leaf_id;
bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret)
goto out;
/* key already overwritten in this snapshot? */
if (k.k->p.snapshot != interior_k.k->p.snapshot)
goto out;
if (bpos_eq(*new_min_pos, POS_MIN)) {
*new_min_pos = k.k->p;
new_min_pos->snapshot = leaf_id;
}
new = bch2_bkey_make_mut_noupdate(trans, interior_k);
ret = PTR_ERR_OR_ZERO(new);
if (ret)
goto out;
new->k.p.snapshot = leaf_id;
ret = bch2_trans_update(trans, &iter, new, 0);
out:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans,
enum btree_id btree,
struct bkey_s_c k,
struct bpos *new_min_pos)
{
struct bch_fs *c = trans->c;
struct bkey_buf sk;
int ret;
bch2_bkey_buf_init(&sk);
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
*new_min_pos = POS_MIN;
for (u32 id = bch2_snapshot_smallest_descendent(c, k.k->p.snapshot);
id < k.k->p.snapshot;
id++) {
if (!bch2_snapshot_is_ancestor(c, id, k.k->p.snapshot) ||
!bch2_snapshot_is_leaf(c, id))
continue;
ret = commit_do(trans, NULL, NULL, 0,
bch2_propagate_key_to_snapshot_leaf(trans, btree, k, id, new_min_pos));
if (ret)
break;
}
bch2_bkey_buf_exit(&sk, c);
return ret;
}
int bch2_snapshots_read(struct bch_fs *c)
{
struct btree_iter iter;
......
......@@ -263,6 +263,9 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
return __bch2_key_has_snapshot_overwrites(trans, id, pos);
}
int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *, enum btree_id,
struct bkey_s_c, struct bpos *);
int bch2_snapshots_read(struct bch_fs *);
void bch2_fs_snapshots_exit(struct bch_fs *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment