Commit ef1669ff authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Update fsck for snapshots

This updates the fsck algorithms to handle snapshots - meaning there
will be multiple versions of the same key (extents, inodes, dirents,
xattrs) in different snapshots, and we have to carefully consider which
keys are visible in which snapshot.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent 6fed42bb
...@@ -61,7 +61,7 @@ int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, ...@@ -61,7 +61,7 @@ int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
struct disk_reservation *, u64 *, int flags); struct disk_reservation *, u64 *, int flags);
int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id, int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id,
struct bpos, struct bpos, u64 *); struct bpos, struct bpos, unsigned, u64 *);
int bch2_btree_delete_range(struct bch_fs *, enum btree_id, int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
struct bpos, struct bpos, u64 *); struct bpos, struct bpos, u64 *);
......
...@@ -1204,13 +1204,14 @@ int bch2_btree_delete_at(struct btree_trans *trans, ...@@ -1204,13 +1204,14 @@ int bch2_btree_delete_at(struct btree_trans *trans,
int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
struct bpos start, struct bpos end, struct bpos start, struct bpos end,
unsigned iter_flags,
u64 *journal_seq) u64 *journal_seq)
{ {
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
int ret = 0; int ret = 0;
bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT); bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT|iter_flags);
retry: retry:
while ((bch2_trans_begin(trans), while ((bch2_trans_begin(trans),
(k = bch2_btree_iter_peek(&iter)).k) && (k = bch2_btree_iter_peek(&iter)).k) &&
...@@ -1277,5 +1278,5 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, ...@@ -1277,5 +1278,5 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
u64 *journal_seq) u64 *journal_seq)
{ {
return bch2_trans_do(c, NULL, journal_seq, 0, return bch2_trans_do(c, NULL, journal_seq, 0,
bch2_btree_delete_range_trans(&trans, id, start, end, journal_seq)); bch2_btree_delete_range_trans(&trans, id, start, end, 0, journal_seq));
} }
...@@ -18,7 +18,8 @@ ...@@ -18,7 +18,8 @@
#define QSTR(n) { { { .len = strlen(n) } }, .name = n } #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum,
u32 snapshot)
{ {
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
...@@ -26,7 +27,7 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) ...@@ -26,7 +27,7 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
int ret; int ret;
for_each_btree_key(trans, iter, BTREE_ID_extents, for_each_btree_key(trans, iter, BTREE_ID_extents,
POS(inum, 0), 0, k, ret) { SPOS(inum, 0, snapshot), 0, k, ret) {
if (k.k->p.inode != inum) if (k.k->p.inode != inum)
break; break;
...@@ -39,6 +40,33 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) ...@@ -39,6 +40,33 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
return ret ?: sectors; return ret ?: sectors;
} }
static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
u32 snapshot)
{
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_s_c_dirent d;
u64 subdirs = 0;
int ret;
for_each_btree_key(trans, iter, BTREE_ID_dirents,
SPOS(inum, 0, snapshot), 0, k, ret) {
if (k.k->p.inode != inum)
break;
if (k.k->type != KEY_TYPE_dirent)
continue;
d = bkey_s_c_to_dirent(k);
if (d.v->d_type == DT_DIR)
subdirs++;
}
bch2_trans_iter_exit(trans, &iter);
return ret ?: subdirs;
}
static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot, static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot,
u32 *subvol) u32 *subvol)
{ {
...@@ -72,8 +100,8 @@ static int snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot, ...@@ -72,8 +100,8 @@ static int snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot,
return lockrestart_do(trans, __snapshot_lookup_subvol(trans, snapshot, subvol)); return lockrestart_do(trans, __snapshot_lookup_subvol(trans, snapshot, subvol));
} }
static int __subvol_lookup_root(struct btree_trans *trans, u32 subvol, static int __subvol_lookup(struct btree_trans *trans, u32 subvol,
u64 *inum) u32 *snapshot, u64 *inum)
{ {
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
...@@ -92,6 +120,7 @@ static int __subvol_lookup_root(struct btree_trans *trans, u32 subvol, ...@@ -92,6 +120,7 @@ static int __subvol_lookup_root(struct btree_trans *trans, u32 subvol,
goto err; goto err;
} }
*snapshot = le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot);
*inum = le64_to_cpu(bkey_s_c_to_subvolume(k).v->inode); *inum = le64_to_cpu(bkey_s_c_to_subvolume(k).v->inode);
err: err:
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
...@@ -99,9 +128,10 @@ static int __subvol_lookup_root(struct btree_trans *trans, u32 subvol, ...@@ -99,9 +128,10 @@ static int __subvol_lookup_root(struct btree_trans *trans, u32 subvol,
} }
static int subvol_lookup_root(struct btree_trans *trans, u32 subvol, u64 *inum) static int subvol_lookup(struct btree_trans *trans, u32 subvol,
u32 *snapshot, u64 *inum)
{ {
return lockrestart_do(trans, __subvol_lookup_root(trans, subvol, inum)); return lockrestart_do(trans, __subvol_lookup(trans, subvol, snapshot, inum));
} }
static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
...@@ -113,13 +143,12 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, ...@@ -113,13 +143,12 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
int ret; int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
POS(0, inode_nr), 0); SPOS(0, inode_nr, *snapshot), 0);
k = bch2_btree_iter_peek_slot(&iter); k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k); ret = bkey_err(k);
if (ret) if (ret)
goto err; goto err;
if (snapshot)
*snapshot = iter.pos.snapshot; *snapshot = iter.pos.snapshot;
ret = k.k->type == KEY_TYPE_inode ret = k.k->type == KEY_TYPE_inode
? bch2_inode_unpack(bkey_s_c_to_inode(k), inode) ? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
...@@ -136,6 +165,36 @@ static int lookup_inode(struct btree_trans *trans, u64 inode_nr, ...@@ -136,6 +165,36 @@ static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot)); return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot));
} }
static int __lookup_dirent(struct btree_trans *trans,
struct bch_hash_info hash_info,
subvol_inum dir, struct qstr *name,
u64 *target, unsigned *type)
{
struct btree_iter iter;
struct bkey_s_c_dirent d;
int ret;
ret = bch2_hash_lookup(trans, &iter, bch2_dirent_hash_desc,
&hash_info, dir, name, 0);
if (ret)
return ret;
d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter));
*target = le64_to_cpu(d.v->d_inum);
*type = d.v->d_type;
bch2_trans_iter_exit(trans, &iter);
return 0;
}
static int lookup_dirent(struct btree_trans *trans,
struct bch_hash_info hash_info,
subvol_inum dir, struct qstr *name,
u64 *target, unsigned *type)
{
return lockrestart_do(trans,
__lookup_dirent(trans, hash_info, dir, name, target, type));
}
static int __write_inode(struct btree_trans *trans, static int __write_inode(struct btree_trans *trans,
struct bch_inode_unpacked *inode, struct bch_inode_unpacked *inode,
u32 snapshot) u32 snapshot)
...@@ -166,6 +225,71 @@ static int write_inode(struct btree_trans *trans, ...@@ -166,6 +225,71 @@ static int write_inode(struct btree_trans *trans,
return ret; return ret;
} }
static int fsck_inode_rm(struct btree_trans *trans, u64 inum, u32 snapshot)
{
struct btree_iter iter = { NULL };
struct bkey_i_inode_generation delete;
struct bch_inode_unpacked inode_u;
struct bkey_s_c k;
int ret;
ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
SPOS(inum, 0, snapshot),
SPOS(inum, U64_MAX, snapshot),
0, NULL) ?:
bch2_btree_delete_range_trans(trans, BTREE_ID_dirents,
SPOS(inum, 0, snapshot),
SPOS(inum, U64_MAX, snapshot),
0, NULL) ?:
bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs,
SPOS(inum, 0, snapshot),
SPOS(inum, U64_MAX, snapshot),
0, NULL);
if (ret)
goto err;
retry:
bch2_trans_begin(trans);
bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
SPOS(0, inum, snapshot), BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret)
goto err;
if (k.k->type != KEY_TYPE_inode) {
bch2_fs_inconsistent(trans->c,
"inode %llu:%u not found when deleting",
inum, snapshot);
ret = -EIO;
goto err;
}
bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u);
/* Subvolume root? */
if (inode_u.bi_subvol) {
ret = bch2_subvolume_delete(trans, inode_u.bi_subvol, -1);
if (ret)
goto err;
}
bkey_inode_generation_init(&delete.k_i);
delete.k.p = iter.pos;
delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
err:
bch2_trans_iter_exit(trans, &iter);
if (ret == -EINTR)
goto retry;
return ret;
}
static int __remove_dirent(struct btree_trans *trans, struct bpos pos) static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
...@@ -200,32 +324,49 @@ static int remove_dirent(struct btree_trans *trans, struct bpos pos) ...@@ -200,32 +324,49 @@ static int remove_dirent(struct btree_trans *trans, struct bpos pos)
} }
/* Get lost+found, create if it doesn't exist: */ /* Get lost+found, create if it doesn't exist: */
static int lookup_lostfound(struct btree_trans *trans, static int lookup_lostfound(struct btree_trans *trans, u32 subvol,
u32 subvol,
struct bch_inode_unpacked *lostfound) struct bch_inode_unpacked *lostfound)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bch_inode_unpacked root; struct bch_inode_unpacked root;
struct bch_hash_info root_hash_info; struct bch_hash_info root_hash_info;
struct qstr lostfound_str = QSTR("lost+found"); struct qstr lostfound_str = QSTR("lost+found");
u64 inum; subvol_inum root_inum = { .subvol = subvol };
u64 inum = 0;
unsigned d_type = 0;
u32 snapshot; u32 snapshot;
int ret; int ret;
ret = subvol_lookup_root(trans, subvol, &inum); ret = subvol_lookup(trans, subvol, &snapshot, &root_inum.inum);
if (ret)
return ret;
ret = lookup_inode(trans, inum, &root, &snapshot); ret = lookup_inode(trans, root_inum.inum, &root, &snapshot);
if (ret && ret != -ENOENT) if (ret) {
bch_err(c, "error fetching subvol root: %i", ret);
return ret; return ret;
}
root_hash_info = bch2_hash_info_init(c, &root); root_hash_info = bch2_hash_info_init(c, &root);
inum = bch2_dirent_lookup(c, root.bi_inum, &root_hash_info,
&lostfound_str); ret = lookup_dirent(trans, root_hash_info, root_inum,
if (!inum) { &lostfound_str, &inum, &d_type);
if (ret == -ENOENT) {
bch_notice(c, "creating lost+found"); bch_notice(c, "creating lost+found");
goto create_lostfound; goto create_lostfound;
} }
if (ret) {
bch_err(c, "error looking up lost+found: %i", ret);
return ret;
}
if (d_type != DT_DIR) {
bch_err(c, "error looking up lost+found: not a directory");
return ret;
}
ret = lookup_inode(trans, inum, lostfound, &snapshot); ret = lookup_inode(trans, inum, lostfound, &snapshot);
if (ret && ret != -ENOENT) { if (ret && ret != -ENOENT) {
/* /*
...@@ -243,11 +384,9 @@ static int lookup_lostfound(struct btree_trans *trans, ...@@ -243,11 +384,9 @@ static int lookup_lostfound(struct btree_trans *trans,
ret = __bch2_trans_do(trans, NULL, NULL, ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW, BTREE_INSERT_LAZY_RW,
bch2_create_trans(trans, bch2_create_trans(trans, root_inum, &root,
BCACHEFS_ROOT_INO, &root, lostfound, &lostfound_str,
lostfound, 0, 0, S_IFDIR|0700, 0, NULL, NULL, 0));
&lostfound_str,
0, 0, S_IFDIR|0700, 0, NULL, NULL));
if (ret) if (ret)
bch_err(c, "error creating lost+found: %i", ret); bch_err(c, "error creating lost+found: %i", ret);
} }
...@@ -257,7 +396,7 @@ static int lookup_lostfound(struct btree_trans *trans, ...@@ -257,7 +396,7 @@ static int lookup_lostfound(struct btree_trans *trans,
static int reattach_inode(struct btree_trans *trans, static int reattach_inode(struct btree_trans *trans,
struct bch_inode_unpacked *inode, struct bch_inode_unpacked *inode,
u32 snapshot) u32 inode_snapshot)
{ {
struct bch_hash_info dir_hash; struct bch_hash_info dir_hash;
struct bch_inode_unpacked lostfound; struct bch_inode_unpacked lostfound;
...@@ -267,7 +406,7 @@ static int reattach_inode(struct btree_trans *trans, ...@@ -267,7 +406,7 @@ static int reattach_inode(struct btree_trans *trans,
u32 subvol; u32 subvol;
int ret; int ret;
ret = snapshot_lookup_subvol(trans, snapshot, &subvol); ret = snapshot_lookup_subvol(trans, inode_snapshot, &subvol);
if (ret) if (ret)
return ret; return ret;
...@@ -289,7 +428,12 @@ static int reattach_inode(struct btree_trans *trans, ...@@ -289,7 +428,12 @@ static int reattach_inode(struct btree_trans *trans,
name = (struct qstr) QSTR(name_buf); name = (struct qstr) QSTR(name_buf);
ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW, ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
bch2_dirent_create(trans, lostfound.bi_inum, &dir_hash, bch2_dirent_create(trans,
(subvol_inum) {
.subvol = subvol,
.inum = lostfound.bi_inum,
},
&dir_hash,
mode_to_type(inode->bi_mode), mode_to_type(inode->bi_mode),
&name, inode->bi_inum, &dir_offset, &name, inode->bi_inum, &dir_offset,
BCH_HASH_SET_MUST_CREATE)); BCH_HASH_SET_MUST_CREATE));
...@@ -302,7 +446,7 @@ static int reattach_inode(struct btree_trans *trans, ...@@ -302,7 +446,7 @@ static int reattach_inode(struct btree_trans *trans,
inode->bi_dir = lostfound.bi_inum; inode->bi_dir = lostfound.bi_inum;
inode->bi_dir_offset = dir_offset; inode->bi_dir_offset = dir_offset;
return write_inode(trans, inode, U32_MAX); return write_inode(trans, inode, inode_snapshot);
} }
static int remove_backpointer(struct btree_trans *trans, static int remove_backpointer(struct btree_trans *trans,
...@@ -329,45 +473,287 @@ static int remove_backpointer(struct btree_trans *trans, ...@@ -329,45 +473,287 @@ static int remove_backpointer(struct btree_trans *trans,
return ret; return ret;
} }
struct snapshots_seen {
struct bpos pos;
size_t nr;
size_t size;
u32 *d;
};
static void snapshots_seen_exit(struct snapshots_seen *s)
{
kfree(s->d);
s->d = NULL;
}
static void snapshots_seen_init(struct snapshots_seen *s)
{
memset(s, 0, sizeof(*s));
}
static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, struct bpos pos)
{
pos.snapshot = snapshot_t(c, pos.snapshot)->equiv;
if (bkey_cmp(s->pos, pos))
s->nr = 0;
s->pos = pos;
if (s->nr == s->size) {
size_t new_size = max(s->size, 128UL) * 2;
u32 *d = krealloc(s->d, new_size * sizeof(s->d[0]), GFP_KERNEL);
if (!d) {
bch_err(c, "error reallocating snapshots_seen table (new size %zu)",
new_size);
return -ENOMEM;
}
s->size = new_size;
s->d = d;
}
/* Might get called multiple times due to lock restarts */
if (s->nr && s->d[s->nr - 1] == pos.snapshot)
return 0;
s->d[s->nr++] = pos.snapshot;
return 0;
}
/**
* key_visible_in_snapshot - returns true if @id is a descendent of @ancestor,
* and @ancestor hasn't been overwritten in @seen
*
* That is, returns whether key in @ancestor snapshot is visible in @id snapshot
*/
static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen,
u32 id, u32 ancestor)
{
ssize_t i;
BUG_ON(id > ancestor);
id = snapshot_t(c, id)->equiv;
ancestor = snapshot_t(c, ancestor)->equiv;
/* @ancestor should be the snapshot most recently added to @seen */
BUG_ON(!seen->nr || seen->d[seen->nr - 1] != ancestor);
BUG_ON(seen->pos.snapshot != ancestor);
if (id == ancestor)
return true;
if (!bch2_snapshot_is_ancestor(c, id, ancestor))
return false;
for (i = seen->nr - 2;
i >= 0 && seen->d[i] >= id;
--i)
if (bch2_snapshot_is_ancestor(c, id, seen->d[i]) &&
bch2_snapshot_is_ancestor(c, seen->d[i], ancestor))
return false;
return true;
}
/**
* ref_visible - given a key with snapshot id @src that points to a key with
* snapshot id @dst, test whether there is some snapshot in which @dst is
* visible.
*
* This assumes we're visiting @src keys in natural key order.
*
* @s - list of snapshot IDs already seen at @src
* @src - snapshot ID of src key
* @dst - snapshot ID of dst key
*/
static int ref_visible(struct bch_fs *c, struct snapshots_seen *s,
u32 src, u32 dst)
{
return dst <= src
? key_visible_in_snapshot(c, s, dst, src)
: bch2_snapshot_is_ancestor(c, src, dst);
}
#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
for (_i = (_w)->d; _i < (_w)->d + (_w)->nr && (_i)->snapshot <= (_snapshot); _i++)\
if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot))
struct inode_walker { struct inode_walker {
bool first_this_inode; bool first_this_inode;
bool have_inode;
u64 cur_inum; u64 cur_inum;
u32 snapshot;
size_t nr;
size_t size;
struct inode_walker_entry {
struct bch_inode_unpacked inode; struct bch_inode_unpacked inode;
u32 snapshot;
u64 count;
} *d;
}; };
static void inode_walker_exit(struct inode_walker *w)
{
kfree(w->d);
w->d = NULL;
}
static struct inode_walker inode_walker_init(void) static struct inode_walker inode_walker_init(void)
{ {
return (struct inode_walker) { return (struct inode_walker) { 0, };
.cur_inum = -1, }
.have_inode = false,
static int inode_walker_realloc(struct inode_walker *w)
{
if (w->nr == w->size) {
size_t new_size = max_t(size_t, 8UL, w->size * 2);
void *d = krealloc(w->d, new_size * sizeof(w->d[0]),
GFP_KERNEL);
if (!d)
return -ENOMEM;
w->d = d;
w->size = new_size;
}
return 0;
}
static int add_inode(struct bch_fs *c, struct inode_walker *w,
struct bkey_s_c_inode inode)
{
struct bch_inode_unpacked u;
int ret;
ret = inode_walker_realloc(w);
if (ret)
return ret;
BUG_ON(bch2_inode_unpack(inode, &u));
w->d[w->nr++] = (struct inode_walker_entry) {
.inode = u,
.snapshot = snapshot_t(c, inode.k->p.snapshot)->equiv,
}; };
return 0;
} }
static int __walk_inode(struct btree_trans *trans, static int __walk_inode(struct btree_trans *trans,
struct inode_walker *w, u64 inum) struct inode_walker *w, struct bpos pos)
{ {
if (inum != w->cur_inum) { struct bch_fs *c = trans->c;
int ret = __lookup_inode(trans, inum, &w->inode, &w->snapshot); struct btree_iter iter;
struct bkey_s_c k;
unsigned i, ancestor_pos;
int ret;
if (ret && ret != -ENOENT) pos.snapshot = snapshot_t(c, pos.snapshot)->equiv;
if (pos.inode == w->cur_inum) {
w->first_this_inode = false;
goto lookup_snapshot;
}
w->nr = 0;
for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, pos.inode),
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (k.k->p.offset != pos.inode)
break;
if (k.k->type == KEY_TYPE_inode)
add_inode(c, w, bkey_s_c_to_inode(k));
}
bch2_trans_iter_exit(trans, &iter);
if (ret)
return ret; return ret;
w->have_inode = !ret; w->cur_inum = pos.inode;
w->cur_inum = inum;
w->first_this_inode = true; w->first_this_inode = true;
} else { lookup_snapshot:
w->first_this_inode = false; for (i = 0; i < w->nr; i++)
if (bch2_snapshot_is_ancestor(c, pos.snapshot, w->d[i].snapshot))
goto found;
return INT_MAX;
found:
BUG_ON(pos.snapshot > w->d[i].snapshot);
if (pos.snapshot != w->d[i].snapshot) {
ancestor_pos = i;
while (i && w->d[i - 1].snapshot > pos.snapshot)
--i;
ret = inode_walker_realloc(w);
if (ret)
return ret;
array_insert_item(w->d, w->nr, i, w->d[ancestor_pos]);
w->d[i].snapshot = pos.snapshot;
w->d[i].count = 0;
} }
return 0; return i;
} }
static int walk_inode(struct btree_trans *trans, static int walk_inode(struct btree_trans *trans,
struct inode_walker *w, u64 inum) struct inode_walker *w, struct bpos pos)
{ {
return lockrestart_do(trans, __walk_inode(trans, w, inum)); return lockrestart_do(trans, __walk_inode(trans, w, pos));
}
static int __get_visible_inodes(struct btree_trans *trans,
struct inode_walker *w,
struct snapshots_seen *s,
u64 inum)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
int ret;
w->nr = 0;
for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum),
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (k.k->p.offset != inum)
break;
if (k.k->type != KEY_TYPE_inode)
continue;
if (ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) {
add_inode(c, w, bkey_s_c_to_inode(k));
if (k.k->p.snapshot >= s->pos.snapshot)
break;
}
}
bch2_trans_iter_exit(trans, &iter);
return ret;
}
static int check_key_has_snapshot(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
char buf[200];
int ret = 0;
if (fsck_err_on(!snapshot_t(c, k.k->p.snapshot)->equiv, c,
"key in missing snapshot: %s",
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) {
ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
return ret ?: -EINTR;
}
fsck_err:
return ret;
} }
static int hash_redo_key(struct btree_trans *trans, static int hash_redo_key(struct btree_trans *trans,
...@@ -375,6 +761,9 @@ static int hash_redo_key(struct btree_trans *trans, ...@@ -375,6 +761,9 @@ static int hash_redo_key(struct btree_trans *trans,
struct bch_hash_info *hash_info, struct bch_hash_info *hash_info,
struct btree_iter *k_iter, struct bkey_s_c k) struct btree_iter *k_iter, struct bkey_s_c k)
{ {
bch_err(trans->c, "hash_redo_key() not implemented yet");
return -EINVAL;
#if 0
struct bkey_i *delete; struct bkey_i *delete;
struct bkey_i *tmp; struct bkey_i *tmp;
...@@ -393,6 +782,7 @@ static int hash_redo_key(struct btree_trans *trans, ...@@ -393,6 +782,7 @@ static int hash_redo_key(struct btree_trans *trans,
return bch2_btree_iter_traverse(k_iter) ?: return bch2_btree_iter_traverse(k_iter) ?:
bch2_trans_update(trans, k_iter, delete, 0) ?: bch2_trans_update(trans, k_iter, delete, 0) ?:
bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0); bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0);
#endif
} }
static int fsck_hash_delete_at(struct btree_trans *trans, static int fsck_hash_delete_at(struct btree_trans *trans,
...@@ -484,30 +874,29 @@ static int hash_check_key(struct btree_trans *trans, ...@@ -484,30 +874,29 @@ static int hash_check_key(struct btree_trans *trans,
static int check_inode(struct btree_trans *trans, static int check_inode(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bkey_s_c_inode inode) struct bch_inode_unpacked *prev,
struct bch_inode_unpacked u)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bch_inode_unpacked u;
bool do_update = false; bool do_update = false;
int ret = 0; int ret = 0;
ret = bch2_inode_unpack(inode, &u); if (fsck_err_on(prev &&
(prev->bi_hash_seed != u.bi_hash_seed ||
if (bch2_fs_inconsistent_on(ret, c, mode_to_type(prev->bi_mode) != mode_to_type(u.bi_mode)), c,
"error unpacking inode %llu in fsck", "inodes in different snapshots don't match")) {
inode.k->p.inode)) bch_err(c, "repair not implemented yet");
return ret; return -EINVAL;
}
if (u.bi_flags & BCH_INODE_UNLINKED && if (u.bi_flags & BCH_INODE_UNLINKED &&
(!c->sb.clean || (!c->sb.clean ||
fsck_err(c, "filesystem marked clean, but inode %llu unlinked", fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
u.bi_inum))) { u.bi_inum))) {
bch_verbose(c, "deleting inode %llu", u.bi_inum);
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
bch2_fs_lazy_rw(c); bch2_fs_lazy_rw(c);
ret = bch2_inode_rm(c, u.bi_inum, false); ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot);
if (ret) if (ret)
bch_err(c, "error in fsck: error %i while deleting inode", ret); bch_err(c, "error in fsck: error %i while deleting inode", ret);
return ret; return ret;
...@@ -527,9 +916,10 @@ static int check_inode(struct btree_trans *trans, ...@@ -527,9 +916,10 @@ static int check_inode(struct btree_trans *trans,
* just switch units to bytes and that issue goes away * just switch units to bytes and that issue goes away
*/ */
ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
POS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9), SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9,
iter->pos.snapshot),
POS(u.bi_inum, U64_MAX), POS(u.bi_inum, U64_MAX),
NULL); 0, NULL);
if (ret) { if (ret) {
bch_err(c, "error in fsck: error %i truncating inode", ret); bch_err(c, "error in fsck: error %i truncating inode", ret);
return ret; return ret;
...@@ -554,7 +944,7 @@ static int check_inode(struct btree_trans *trans, ...@@ -554,7 +944,7 @@ static int check_inode(struct btree_trans *trans,
bch_verbose(c, "recounting sectors for inode %llu", bch_verbose(c, "recounting sectors for inode %llu",
u.bi_inum); u.bi_inum);
sectors = bch2_count_inode_sectors(trans, u.bi_inum); sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot);
if (sectors < 0) { if (sectors < 0) {
bch_err(c, "error in fsck: error %i recounting inode sectors", bch_err(c, "error in fsck: error %i recounting inode sectors",
(int) sectors); (int) sectors);
...@@ -574,11 +964,7 @@ static int check_inode(struct btree_trans *trans, ...@@ -574,11 +964,7 @@ static int check_inode(struct btree_trans *trans,
} }
if (do_update) { if (do_update) {
ret = __bch2_trans_do(trans, NULL, NULL, ret = write_inode(trans, &u, iter->pos.snapshot);
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
bch2_btree_iter_traverse(iter) ?:
bch2_inode_write(trans, iter, &u));
if (ret) if (ret)
bch_err(c, "error in fsck: error %i " bch_err(c, "error in fsck: error %i "
"updating inode", ret); "updating inode", ret);
...@@ -594,26 +980,49 @@ static int check_inodes(struct bch_fs *c, bool full) ...@@ -594,26 +980,49 @@ static int check_inodes(struct bch_fs *c, bool full)
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_s_c_inode inode; struct bkey_s_c_inode inode;
struct bch_inode_unpacked prev, u;
int ret; int ret;
memset(&prev, 0, sizeof(prev));
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH, k, ret) { BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
ret = check_key_has_snapshot(&trans, &iter, k);
if (ret)
break;
/*
* if snapshot id isn't a leaf node, skip it - deletion in
* particular is not atomic, so on the internal snapshot nodes
* we can see inodes marked for deletion after a clean shutdown
*/
if (bch2_snapshot_internal_node(c, k.k->p.snapshot))
continue;
if (k.k->type != KEY_TYPE_inode) if (k.k->type != KEY_TYPE_inode)
continue; continue;
inode = bkey_s_c_to_inode(k); inode = bkey_s_c_to_inode(k);
if (full || if (!full &&
(inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY| !(inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY|
BCH_INODE_I_SECTORS_DIRTY| BCH_INODE_I_SECTORS_DIRTY|
BCH_INODE_UNLINKED))) { BCH_INODE_UNLINKED)))
ret = check_inode(&trans, &iter, inode); continue;
BUG_ON(bch2_inode_unpack(inode, &u));
ret = check_inode(&trans, &iter,
full && prev.bi_inum == u.bi_inum
? &prev : NULL, u);
if (ret) if (ret)
break; break;
}
prev = u;
} }
bch2_trans_iter_exit(&trans, &iter); bch2_trans_iter_exit(&trans, &iter);
...@@ -622,6 +1031,29 @@ static int check_inodes(struct bch_fs *c, bool full) ...@@ -622,6 +1031,29 @@ static int check_inodes(struct bch_fs *c, bool full)
return bch2_trans_exit(&trans) ?: ret; return bch2_trans_exit(&trans) ?: ret;
} }
noinline_for_stack
static int check_subvols(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
for_each_btree_key(&trans, iter, BTREE_ID_subvolumes, POS_MIN,
0, k, ret) {
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
return ret;
}
/*
* Checking for overlapping extents needs to be reimplemented
*/
#if 0
static int fix_overlapping_extent(struct btree_trans *trans, static int fix_overlapping_extent(struct btree_trans *trans,
struct bkey_s_c k, struct bpos cut_at) struct bkey_s_c k, struct bpos cut_at)
{ {
...@@ -657,16 +1089,18 @@ static int fix_overlapping_extent(struct btree_trans *trans, ...@@ -657,16 +1089,18 @@ static int fix_overlapping_extent(struct btree_trans *trans,
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
} }
#endif
static int inode_backpointer_exists(struct btree_trans *trans, static int inode_backpointer_exists(struct btree_trans *trans,
struct bch_inode_unpacked *inode) struct bch_inode_unpacked *inode,
u32 snapshot)
{ {
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents,
POS(inode->bi_dir, inode->bi_dir_offset), 0); SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot), 0);
k = bch2_btree_iter_peek_slot(&iter); k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k); ret = bkey_err(k);
if (ret) if (ret)
...@@ -674,19 +1108,157 @@ static int inode_backpointer_exists(struct btree_trans *trans, ...@@ -674,19 +1108,157 @@ static int inode_backpointer_exists(struct btree_trans *trans,
if (k.k->type != KEY_TYPE_dirent) if (k.k->type != KEY_TYPE_dirent)
goto out; goto out;
ret = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum) == inode->bi_inum; ret = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum) == inode->bi_inum;
out: out:
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret;
}
static bool inode_backpointer_matches(struct bkey_s_c_dirent d,
struct bch_inode_unpacked *inode)
{
return d.k->p.inode == inode->bi_dir &&
d.k->p.offset == inode->bi_dir_offset;
}
static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
{
struct bch_fs *c = trans->c;
struct inode_walker_entry *i;
int ret = 0, ret2 = 0;
s64 count2;
for (i = w->d; i < w->d + w->nr; i++) {
if (i->inode.bi_sectors == i->count)
continue;
count2 = lockrestart_do(trans,
bch2_count_inode_sectors(trans, w->cur_inum, i->snapshot));
if (i->count != count2) {
bch_err(c, "fsck counted i_sectors wrong: got %llu should be %llu",
i->count, count2);
i->count = count2;
if (i->inode.bi_sectors == i->count)
continue;
}
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), c,
"inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
w->cur_inum, i->snapshot,
i->inode.bi_sectors, i->count) == FSCK_ERR_IGNORE)
continue;
i->inode.bi_sectors = i->count;
ret = write_inode(trans, &i->inode, i->snapshot);
if (ret)
break;
ret2 = -EINTR;
}
fsck_err:
return ret ?: ret2;
}
static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
struct inode_walker *inode,
struct snapshots_seen *s)
{
struct bch_fs *c = trans->c;
struct bkey_s_c k;
struct inode_walker_entry *i;
char buf[200];
int ret = 0;
k = bch2_btree_iter_peek(iter);
if (!k.k)
return 0;
ret = bkey_err(k);
if (ret)
return ret;
ret = check_key_has_snapshot(trans, iter, k);
if (ret)
return ret;
ret = snapshots_seen_update(c, s, k.k->p);
if (ret)
return ret;
if (k.k->type == KEY_TYPE_whiteout)
return 0;
if (inode->cur_inum != k.k->p.inode) {
ret = check_i_sectors(trans, inode);
if (ret)
return ret;
}
#if 0
if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
char buf1[200];
char buf2[200];
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
bch2_bkey_val_to_text(&PBUF(buf2), c, k);
if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2))
return fix_overlapping_extent(trans, k, prev.k->k.p) ?: -EINTR;
}
#endif
ret = __walk_inode(trans, inode, k.k->p);
if (ret < 0)
return ret;
if (fsck_err_on(ret == INT_MAX, c,
"extent in missing inode:\n %s",
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
return __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
if (ret == INT_MAX)
return 0;
i = inode->d + ret;
ret = 0;
if (fsck_err_on(!S_ISREG(i->inode.bi_mode) &&
!S_ISLNK(i->inode.bi_mode), c,
"extent in non regular inode mode %o:\n %s",
i->inode.bi_mode,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
return __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
if (!bch2_snapshot_internal_node(c, k.k->p.snapshot)) {
for_each_visible_inode(c, s, inode, k.k->p.snapshot, i) {
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
k.k->type != KEY_TYPE_reservation &&
k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c,
"extent type %u offset %llu past end of inode %llu, i_size %llu",
k.k->type, k.k->p.offset, k.k->p.inode, i->inode.bi_size)) {
bch2_fs_lazy_rw(c);
return bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
SPOS(k.k->p.inode, round_up(i->inode.bi_size, block_bytes(c)) >> 9,
k.k->p.snapshot),
POS(k.k->p.inode, U64_MAX),
0, NULL) ?: -EINTR;
}
}
}
if (bkey_extent_is_allocation(k.k))
for_each_visible_inode(c, s, inode, k.k->p.snapshot, i)
i->count += k.k->size;
#if 0
bch2_bkey_buf_reassemble(&prev, c, k);
#endif
fsck_err:
return ret; return ret;
} }
static bool inode_backpointer_matches(struct bkey_s_c_dirent d,
struct bch_inode_unpacked *inode)
{
return d.k->p.inode == inode->bi_dir &&
d.k->p.offset == inode->bi_dir_offset;
}
/* /*
* Walk extents: verify that extents have a corresponding S_ISREG inode, and * Walk extents: verify that extents have a corresponding S_ISREG inode, and
* that i_size an i_sectors are consistent * that i_size an i_sectors are consistent
...@@ -695,15 +1267,17 @@ noinline_for_stack ...@@ -695,15 +1267,17 @@ noinline_for_stack
static int check_extents(struct bch_fs *c) static int check_extents(struct bch_fs *c)
{ {
struct inode_walker w = inode_walker_init(); struct inode_walker w = inode_walker_init();
struct snapshots_seen s;
struct btree_trans trans; struct btree_trans trans;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k;
struct bkey_buf prev;
u64 i_sectors = 0;
int ret = 0; int ret = 0;
#if 0
struct bkey_buf prev;
bch2_bkey_buf_init(&prev); bch2_bkey_buf_init(&prev);
prev.k->k = KEY(0, 0, 0); prev.k->k = KEY(0, 0, 0);
#endif
snapshots_seen_init(&s);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
bch_verbose(c, "checking extents"); bch_verbose(c, "checking extents");
...@@ -711,96 +1285,172 @@ static int check_extents(struct bch_fs *c) ...@@ -711,96 +1285,172 @@ static int check_extents(struct bch_fs *c)
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
POS(BCACHEFS_ROOT_INO, 0), POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH); BTREE_ITER_PREFETCH|
retry: BTREE_ITER_ALL_SNAPSHOTS);
while ((k = bch2_btree_iter_peek(&iter)).k &&
!(ret = bkey_err(k))) { do {
if (w.have_inode && ret = lockrestart_do(&trans,
w.cur_inum != k.k->p.inode && check_extent(&trans, &iter, &w, &s));
!(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
fsck_err_on(w.inode.bi_sectors != i_sectors, c,
"inode %llu has incorrect i_sectors: got %llu, should be %llu",
w.inode.bi_inum,
w.inode.bi_sectors, i_sectors)) {
w.inode.bi_sectors = i_sectors;
ret = write_inode(&trans, &w.inode, w.snapshot);
if (ret) if (ret)
break; break;
} } while (bch2_btree_iter_advance(&iter));
bch2_trans_iter_exit(&trans, &iter);
#if 0
bch2_bkey_buf_exit(&prev, c);
#endif
inode_walker_exit(&w);
bch2_trans_exit(&trans);
snapshots_seen_exit(&s);
if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) { return ret;
char buf1[200]; }
char buf2[200];
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k)); static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
bch2_bkey_val_to_text(&PBUF(buf2), c, k); {
struct bch_fs *c = trans->c;
struct inode_walker_entry *i;
int ret = 0, ret2 = 0;
s64 count2;
if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) for (i = w->d; i < w->d + w->nr; i++) {
return fix_overlapping_extent(&trans, k, prev.k->k.p) ?: -EINTR; if (i->inode.bi_nlink == i->count)
continue;
count2 = lockrestart_do(trans,
bch2_count_subdirs(trans, w->cur_inum, i->snapshot));
if (i->count != count2) {
bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu",
i->count, count2);
i->count = count2;
if (i->inode.bi_nlink == i->count)
continue;
} }
ret = walk_inode(&trans, &w, k.k->p.inode); if (fsck_err_on(i->inode.bi_nlink != i->count, c,
"directory %llu:%u with wrong i_nlink: got %u, should be %llu",
w->cur_inum, i->snapshot, i->inode.bi_nlink, i->count)) {
i->inode.bi_nlink = i->count;
ret = write_inode(trans, &i->inode, i->snapshot);
if (ret) if (ret)
break; break;
ret2 = -EINTR;
}
}
fsck_err:
return ret ?: ret2;
}
if (w.first_this_inode) static int check_dirent_target(struct btree_trans *trans,
i_sectors = 0; struct btree_iter *iter,
struct bkey_s_c_dirent d,
if (fsck_err_on(!w.have_inode, c, struct bch_inode_unpacked *target,
"extent type %u for missing inode %llu", u32 target_snapshot)
k.k->type, k.k->p.inode) || {
fsck_err_on(w.have_inode && struct bch_fs *c = trans->c;
!S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c, bool backpointer_exists = true;
"extent type %u for non regular file, inode %llu mode %o", char buf[200];
k.k->type, k.k->p.inode, w.inode.bi_mode)) { int ret = 0;
bch2_fs_lazy_rw(c);
return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents, if (!target->bi_dir &&
POS(k.k->p.inode, 0), !target->bi_dir_offset) {
POS(k.k->p.inode, U64_MAX), target->bi_dir = d.k->p.inode;
NULL) ?: -EINTR; target->bi_dir_offset = d.k->p.offset;
ret = write_inode(trans, target, target_snapshot);
if (ret)
goto err;
} }
if (fsck_err_on(w.have_inode && if (!inode_backpointer_matches(d, target)) {
!(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && ret = inode_backpointer_exists(trans, target, d.k->p.snapshot);
k.k->type != KEY_TYPE_reservation && if (ret < 0)
k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c, goto err;
"extent type %u offset %llu past end of inode %llu, i_size %llu",
k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) { backpointer_exists = ret;
bch2_fs_lazy_rw(c); ret = 0;
return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents,
POS(k.k->p.inode, round_up(w.inode.bi_size, block_bytes(c)) >> 9), if (fsck_err_on(S_ISDIR(target->bi_mode) &&
POS(k.k->p.inode, U64_MAX), backpointer_exists, c,
NULL) ?: -EINTR; "directory %llu with multiple links",
target->bi_inum)) {
ret = remove_dirent(trans, d.k->p);
if (ret)
goto err;
return 0;
} }
if (bkey_extent_is_allocation(k.k)) if (fsck_err_on(backpointer_exists &&
i_sectors += k.k->size; !target->bi_nlink, c,
bch2_bkey_buf_reassemble(&prev, c, k); "inode %llu has multiple links but i_nlink 0",
target->bi_inum)) {
target->bi_nlink++;
target->bi_flags &= ~BCH_INODE_UNLINKED;
bch2_btree_iter_advance(&iter); ret = write_inode(trans, target, target_snapshot);
if (ret)
goto err;
}
if (fsck_err_on(!backpointer_exists, c,
"inode %llu has wrong backpointer:\n"
"got %llu:%llu\n"
"should be %llu:%llu",
target->bi_inum,
target->bi_dir,
target->bi_dir_offset,
d.k->p.inode,
d.k->p.offset)) {
target->bi_dir = d.k->p.inode;
target->bi_dir_offset = d.k->p.offset;
ret = write_inode(trans, target, target_snapshot);
if (ret)
goto err;
}
}
if (fsck_err_on(vfs_d_type(d.v->d_type) != mode_to_type(target->bi_mode), c,
"incorrect d_type: should be %u:\n%s",
mode_to_type(target->bi_mode),
(bch2_bkey_val_to_text(&PBUF(buf), c, d.s_c), buf))) {
struct bkey_i_dirent *n;
n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
if (!n) {
ret = -ENOMEM;
goto err;
}
bkey_reassemble(&n->k_i, d.s_c);
n->v.d_type = mode_to_type(target->bi_mode);
ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
bch2_trans_update(trans, iter, &n->k_i, 0));
kfree(n);
if (ret)
goto err;
} }
err:
fsck_err: fsck_err:
if (ret == -EINTR) return ret;
goto retry;
bch2_trans_iter_exit(&trans, &iter);
bch2_bkey_buf_exit(&prev, c);
return bch2_trans_exit(&trans) ?: ret;
} }
static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
struct bch_hash_info *hash_info, struct bch_hash_info *hash_info,
struct inode_walker *w, unsigned *nr_subdirs) struct inode_walker *dir,
struct inode_walker *target,
struct snapshots_seen *s)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_s_c_dirent d; struct bkey_s_c_dirent d;
struct bch_inode_unpacked target; struct inode_walker_entry *i;
u32 target_snapshot; u32 target_snapshot;
u32 target_subvol; u32 target_subvol;
bool have_target; u64 target_inum;
bool backpointer_exists = true;
u64 d_inum;
char buf[200]; char buf[200];
int ret; int ret;
...@@ -812,38 +1462,49 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ...@@ -812,38 +1462,49 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
if (ret) if (ret)
return ret; return ret;
if (w->have_inode && ret = check_key_has_snapshot(trans, iter, k);
w->cur_inum != k.k->p.inode && if (ret)
fsck_err_on(w->inode.bi_nlink != *nr_subdirs, c, return ret;
"directory %llu with wrong i_nlink: got %u, should be %u",
w->inode.bi_inum, w->inode.bi_nlink, *nr_subdirs)) {
w->inode.bi_nlink = *nr_subdirs;
ret = write_inode(trans, &w->inode, w->snapshot);
return ret ?: -EINTR;
}
ret = __walk_inode(trans, w, k.k->p.inode); ret = snapshots_seen_update(c, s, k.k->p);
if (ret) if (ret)
return ret; return ret;
if (w->first_this_inode) if (k.k->type == KEY_TYPE_whiteout)
*nr_subdirs = 0; return 0;
if (dir->cur_inum != k.k->p.inode) {
ret = check_subdir_count(trans, dir);
if (ret)
return ret;
}
ret = __walk_inode(trans, dir, k.k->p);
if (ret < 0)
return ret;
if (fsck_err_on(!w->have_inode, c, if (fsck_err_on(ret == INT_MAX, c,
"dirent in nonexisting directory:\n%s", "dirent in nonexisting directory:\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)) || (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
fsck_err_on(!S_ISDIR(w->inode.bi_mode), c, return __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
if (ret == INT_MAX)
return 0;
i = dir->d + ret;
ret = 0;
if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), c,
"dirent in non directory inode type %u:\n%s", "dirent in non directory inode type %u:\n%s",
mode_to_type(w->inode.bi_mode), mode_to_type(i->inode.bi_mode),
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
return __bch2_trans_do(trans, NULL, NULL, 0, return __bch2_trans_do(trans, NULL, NULL, 0,
bch2_btree_delete_at(trans, iter, 0)); bch2_btree_delete_at(trans, iter, 0));
if (!w->have_inode) if (dir->first_this_inode)
return 0; *hash_info = bch2_hash_info_init(c, &dir->d[0].inode);
if (w->first_this_inode)
*hash_info = bch2_hash_info_init(c, &w->inode);
ret = hash_check_key(trans, bch2_dirent_hash_desc, ret = hash_check_key(trans, bch2_dirent_hash_desc,
hash_info, iter, k); hash_info, iter, k);
...@@ -856,128 +1517,76 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ...@@ -856,128 +1517,76 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
return 0; return 0;
d = bkey_s_c_to_dirent(k); d = bkey_s_c_to_dirent(k);
d_inum = le64_to_cpu(d.v->d_inum);
ret = __bch2_dirent_read_target(&trans, d, ret = __bch2_dirent_read_target(trans, d,
&target_subvol, &target_subvol,
&target_snapshot, &target_snapshot,
&target_inum); &target_inum,
true);
if (ret && ret != -ENOENT) if (ret && ret != -ENOENT)
return ret; return ret;
ret = __lookup_inode(trans, d_inum, &target, &target_snapshot); if (fsck_err_on(ret, c,
if (ret && ret != -ENOENT) "dirent points to missing subvolume %llu",
return ret; le64_to_cpu(d.v->d_inum)))
have_target = !ret;
ret = 0;
if (fsck_err_on(!have_target, c,
"dirent points to missing inode:\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c,
k), buf)))
return remove_dirent(trans, d.k->p); return remove_dirent(trans, d.k->p);
if (!have_target) if (target_subvol) {
return 0; struct bch_inode_unpacked subvol_root;
if (!target.bi_dir && ret = __lookup_inode(trans, target_inum,
!target.bi_dir_offset) { &subvol_root, &target_snapshot);
target.bi_dir = k.k->p.inode; if (ret && ret != -ENOENT)
target.bi_dir_offset = k.k->p.offset; return ret;
ret = __write_inode(trans, &target, target_snapshot) ?: if (fsck_err_on(ret, c,
bch2_trans_commit(trans, NULL, NULL, "subvolume %u points to missing subvolume root %llu",
BTREE_INSERT_NOFAIL| target_subvol,
BTREE_INSERT_LAZY_RW); target_inum)) {
bch_err(c, "repair not implemented yet");
return -EINVAL;
}
if (fsck_err_on(subvol_root.bi_subvol != target_subvol, c,
"subvol root %llu has wrong bi_subvol field: got %u, should be %u",
target_inum,
subvol_root.bi_subvol, target_subvol)) {
subvol_root.bi_subvol = target_subvol;
ret = write_inode(trans, &subvol_root, target_snapshot);
if (ret) if (ret)
return ret; return ret;
return -EINTR;
} }
if (!inode_backpointer_matches(d, &target)) { ret = check_dirent_target(trans, iter, d, &subvol_root,
ret = inode_backpointer_exists(trans, &target); target_snapshot);
if (ret < 0) if (ret)
return ret;
} else {
ret = __get_visible_inodes(trans, target, s, target_inum);
if (ret)
return ret; return ret;
backpointer_exists = ret; if (fsck_err_on(!target->nr, c,
ret = 0; "dirent points to missing inode:\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c,
if (fsck_err_on(S_ISDIR(target.bi_mode) && k), buf))) {
backpointer_exists, c, ret = remove_dirent(trans, d.k->p);
"directory %llu with multiple links", if (ret)
target.bi_inum)) return ret;
return remove_dirent(trans, d.k->p);
if (fsck_err_on(backpointer_exists &&
!target.bi_nlink, c,
"inode %llu has multiple links but i_nlink 0",
d_inum)) {
target.bi_nlink++;
target.bi_flags &= ~BCH_INODE_UNLINKED;
ret = write_inode(trans, &target, target_snapshot);
return ret ?: -EINTR;
} }
if (fsck_err_on(!backpointer_exists, c, for (i = target->d; i < target->d + target->nr; i++) {
"inode %llu has wrong backpointer:\n" ret = check_dirent_target(trans, iter, d,
"got %llu:%llu\n" &i->inode, i->snapshot);
"should be %llu:%llu", if (ret)
d_inum, return ret;
target.bi_dir,
target.bi_dir_offset,
k.k->p.inode,
k.k->p.offset)) {
target.bi_dir = k.k->p.inode;
target.bi_dir_offset = k.k->p.offset;
ret = write_inode(trans, &target, target_snapshot);
return ret ?: -EINTR;
}
} }
target_subvol = d.v->d_type == DT_SUBVOL
? le64_to_cpu(d.v->d_inum) : 0;
if (fsck_err_on(target.bi_subvol != target_subvol, c,
"subvol root %llu has wrong subvol field:\n"
"got %u\n"
"should be %u",
target.bi_inum,
target.bi_subvol,
target_subvol)) {
target.bi_subvol = target_subvol;
ret = write_inode(trans, &target, target_snapshot);
return ret ?: -EINTR;
} }
if (fsck_err_on(vfs_d_type(d.v->d_type) != mode_to_type(target.bi_mode), c, if (d.v->d_type == DT_DIR)
"incorrect d_type: should be %u:\n%s", for_each_visible_inode(c, s, dir, d.k->p.snapshot, i)
mode_to_type(target.bi_mode), i->count++;
(bch2_bkey_val_to_text(&PBUF(buf), c,
k), buf))) {
struct bkey_i_dirent *n;
n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
if (!n)
return -ENOMEM;
bkey_reassemble(&n->k_i, d.s_c);
n->v.d_type = mode_to_type(target.bi_mode);
ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
bch2_btree_iter_traverse(iter) ?:
bch2_trans_update(trans, iter, &n->k_i, 0));
kfree(n);
return ret ?: -EINTR;
}
*nr_subdirs += d.v->d_type == DT_DIR;
return 0;
fsck_err: fsck_err:
return ret; return ret;
} }
...@@ -989,31 +1598,39 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ...@@ -989,31 +1598,39 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
noinline_for_stack noinline_for_stack
static int check_dirents(struct bch_fs *c) static int check_dirents(struct bch_fs *c)
{ {
struct inode_walker w = inode_walker_init(); struct inode_walker dir = inode_walker_init();
struct inode_walker target = inode_walker_init();
struct snapshots_seen s;
struct bch_hash_info hash_info; struct bch_hash_info hash_info;
struct btree_trans trans; struct btree_trans trans;
struct btree_iter iter; struct btree_iter iter;
unsigned nr_subdirs = 0;
int ret = 0; int ret = 0;
bch_verbose(c, "checking dirents"); bch_verbose(c, "checking dirents");
snapshots_seen_init(&s);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
bch2_trans_iter_init(&trans, &iter, BTREE_ID_dirents, bch2_trans_iter_init(&trans, &iter, BTREE_ID_dirents,
POS(BCACHEFS_ROOT_INO, 0), POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH); BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS);
do { do {
ret = lockrestart_do(&trans, ret = lockrestart_do(&trans,
check_dirent(&trans, &iter, &hash_info, &w, &nr_subdirs)); check_dirent(&trans, &iter, &hash_info,
&dir, &target, &s));
if (ret) if (ret)
break; break;
} while (bch2_btree_iter_advance(&iter)); } while (bch2_btree_iter_advance(&iter));
bch2_trans_iter_exit(&trans, &iter); bch2_trans_iter_exit(&trans, &iter);
return bch2_trans_exit(&trans) ?: ret; bch2_trans_exit(&trans);
snapshots_seen_exit(&s);
inode_walker_exit(&dir);
inode_walker_exit(&target);
return ret;
} }
/* /*
...@@ -1036,15 +1653,22 @@ static int check_xattrs(struct bch_fs *c) ...@@ -1036,15 +1653,22 @@ static int check_xattrs(struct bch_fs *c)
bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
POS(BCACHEFS_ROOT_INO, 0), POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH); BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS);
retry: retry:
bch2_trans_begin(&trans);
while ((k = bch2_btree_iter_peek(&iter)).k && while ((k = bch2_btree_iter_peek(&iter)).k &&
!(ret = bkey_err(k))) { !(ret = bkey_err(k))) {
ret = walk_inode(&trans, &w, k.k->p.inode); ret = check_key_has_snapshot(&trans, &iter, k);
if (ret) if (ret)
break; break;
if (fsck_err_on(!w.have_inode, c, ret = walk_inode(&trans, &w, k.k->p);
if (ret < 0)
break;
if (fsck_err_on(ret == INT_MAX, c,
"xattr for missing inode %llu", "xattr for missing inode %llu",
k.k->p.inode)) { k.k->p.inode)) {
ret = bch2_btree_delete_at(&trans, &iter, 0); ret = bch2_btree_delete_at(&trans, &iter, 0);
...@@ -1053,14 +1677,18 @@ static int check_xattrs(struct bch_fs *c) ...@@ -1053,14 +1677,18 @@ static int check_xattrs(struct bch_fs *c)
continue; continue;
} }
if (w.first_this_inode && w.have_inode) if (ret == INT_MAX)
hash_info = bch2_hash_info_init(c, &w.inode); goto next;
ret = 0;
if (w.first_this_inode)
hash_info = bch2_hash_info_init(c, &w.d[0].inode);
ret = hash_check_key(&trans, bch2_xattr_hash_desc, ret = hash_check_key(&trans, bch2_xattr_hash_desc,
&hash_info, &iter, k); &hash_info, &iter, k);
if (ret) if (ret)
break; break;
next:
bch2_btree_iter_advance(&iter); bch2_btree_iter_advance(&iter);
} }
fsck_err: fsck_err:
...@@ -1072,40 +1700,63 @@ static int check_xattrs(struct bch_fs *c) ...@@ -1072,40 +1700,63 @@ static int check_xattrs(struct bch_fs *c)
} }
/* Get root directory, create if it doesn't exist: */ /* Get root directory, create if it doesn't exist: */
static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode) static int check_root(struct bch_fs *c)
{ {
struct bkey_inode_buf packed; struct btree_trans trans;
struct bch_inode_unpacked root_inode;
u32 snapshot; u32 snapshot;
u64 inum;
int ret; int ret;
bch2_trans_init(&trans, c, 0, 0);
bch_verbose(c, "checking root directory"); bch_verbose(c, "checking root directory");
ret = bch2_trans_do(c, NULL, NULL, 0, ret = subvol_lookup(&trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum);
lookup_inode(&trans, BCACHEFS_ROOT_INO, root_inode, &snapshot));
if (ret && ret != -ENOENT) if (ret && ret != -ENOENT)
return ret; return ret;
if (fsck_err_on(ret, c, "root directory missing")) if (mustfix_fsck_err_on(ret, c, "root subvol missing")) {
goto create_root; struct bkey_i_subvolume root_subvol;
if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c, snapshot = U32_MAX;
"root inode not a directory")) inum = BCACHEFS_ROOT_INO;
goto create_root;
return 0; bkey_subvolume_init(&root_subvol.k_i);
fsck_err: root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL;
root_subvol.v.flags = 0;
root_subvol.v.snapshot = cpu_to_le32(snapshot);
root_subvol.v.inode = cpu_to_le64(inum);
ret = __bch2_trans_do(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
__bch2_btree_insert(&trans, BTREE_ID_subvolumes, &root_subvol.k_i));
if (ret) {
bch_err(c, "error writing root subvol: %i", ret);
goto err;
}
}
ret = lookup_inode(&trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot);
if (ret && ret != -ENOENT)
return ret; return ret;
create_root:
bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|0755,
0, NULL);
root_inode->bi_inum = BCACHEFS_ROOT_INO;
bch2_inode_pack(c, &packed, root_inode); if (mustfix_fsck_err_on(ret, c, "root directory missing") ||
mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), c,
"root inode not a directory")) {
bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755,
0, NULL);
root_inode.bi_inum = inum;
return bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i, ret = write_inode(&trans, &root_inode, snapshot);
NULL, NULL, if (ret)
BTREE_INSERT_NOFAIL| bch_err(c, "error writing root inode: %i", ret);
BTREE_INSERT_LAZY_RW); }
err:
fsck_err:
bch2_trans_exit(&trans);
return ret;
} }
struct pathbuf { struct pathbuf {
...@@ -1147,17 +1798,18 @@ static int check_path(struct btree_trans *trans, ...@@ -1147,17 +1798,18 @@ static int check_path(struct btree_trans *trans,
size_t i; size_t i;
int ret = 0; int ret = 0;
snapshot = snapshot_t(c, snapshot)->equiv;
p->nr = 0; p->nr = 0;
while (inode->bi_inum != BCACHEFS_ROOT_INO) { while (inode->bi_inum != BCACHEFS_ROOT_INO) {
ret = lockrestart_do(trans, ret = lockrestart_do(trans,
inode_backpointer_exists(trans, inode)); inode_backpointer_exists(trans, inode, snapshot));
if (ret < 0) if (ret < 0)
break; break;
if (!ret) { if (!ret) {
if (fsck_err(c, "unreachable inode %llu, type %u nlink %u backptr %llu:%llu", if (fsck_err(c, "unreachable inode %llu:%u, type %u nlink %u backptr %llu:%llu",
inode->bi_inum, inode->bi_inum, snapshot,
mode_to_type(inode->bi_mode), mode_to_type(inode->bi_mode),
inode->bi_nlink, inode->bi_nlink,
inode->bi_dir, inode->bi_dir,
...@@ -1226,7 +1878,8 @@ static int check_directory_structure(struct bch_fs *c) ...@@ -1226,7 +1878,8 @@ static int check_directory_structure(struct bch_fs *c)
for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH, k, ret) { BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (k.k->type != KEY_TYPE_inode) if (k.k->type != KEY_TYPE_inode)
continue; continue;
...@@ -1237,6 +1890,9 @@ static int check_directory_structure(struct bch_fs *c) ...@@ -1237,6 +1890,9 @@ static int check_directory_structure(struct bch_fs *c)
break; break;
} }
if (u.bi_flags & BCH_INODE_UNLINKED)
continue;
ret = check_path(&trans, &path, &u, iter.pos.snapshot); ret = check_path(&trans, &path, &u, iter.pos.snapshot);
if (ret) if (ret)
break; break;
...@@ -1295,8 +1951,9 @@ static int nlink_cmp(const void *_l, const void *_r) ...@@ -1295,8 +1951,9 @@ static int nlink_cmp(const void *_l, const void *_r)
return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot); return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
} }
static void inc_link(struct bch_fs *c, struct nlink_table *links, static void inc_link(struct bch_fs *c, struct snapshots_seen *s,
u64 range_start, u64 range_end, u64 inum) struct nlink_table *links,
u64 range_start, u64 range_end, u64 inum, u32 snapshot)
{ {
struct nlink *link, key = { struct nlink *link, key = {
.inum = inum, .snapshot = U32_MAX, .inum = inum, .snapshot = U32_MAX,
...@@ -1307,8 +1964,18 @@ static void inc_link(struct bch_fs *c, struct nlink_table *links, ...@@ -1307,8 +1964,18 @@ static void inc_link(struct bch_fs *c, struct nlink_table *links,
link = __inline_bsearch(&key, links->d, links->nr, link = __inline_bsearch(&key, links->d, links->nr,
sizeof(links->d[0]), nlink_cmp); sizeof(links->d[0]), nlink_cmp);
if (link) if (!link)
return;
while (link > links->d && link[0].inum == link[-1].inum)
--link;
for (; link < links->d + links->nr && link->inum == inum; link++)
if (ref_visible(c, s, snapshot, link->snapshot)) {
link->count++; link->count++;
if (link->snapshot >= snapshot)
break;
}
} }
noinline_for_stack noinline_for_stack
...@@ -1328,7 +1995,8 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, ...@@ -1328,7 +1995,8 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
for_each_btree_key(&trans, iter, BTREE_ID_inodes, for_each_btree_key(&trans, iter, BTREE_ID_inodes,
POS(0, start), POS(0, start),
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH, k, ret) { BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (k.k->type != KEY_TYPE_inode) if (k.k->type != KEY_TYPE_inode)
continue; continue;
...@@ -1369,23 +2037,33 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links ...@@ -1369,23 +2037,33 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
u64 range_start, u64 range_end) u64 range_start, u64 range_end)
{ {
struct btree_trans trans; struct btree_trans trans;
struct snapshots_seen s;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_s_c_dirent d; struct bkey_s_c_dirent d;
int ret; int ret;
snapshots_seen_init(&s);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN, for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN,
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH, k, ret) { BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
ret = snapshots_seen_update(c, &s, k.k->p);
if (ret)
break;
switch (k.k->type) { switch (k.k->type) {
case KEY_TYPE_dirent: case KEY_TYPE_dirent:
d = bkey_s_c_to_dirent(k); d = bkey_s_c_to_dirent(k);
if (d.v->d_type != DT_DIR) if (d.v->d_type != DT_DIR &&
inc_link(c, links, range_start, range_end, d.v->d_type != DT_SUBVOL)
le64_to_cpu(d.v->d_inum)); inc_link(c, &s, links, range_start, range_end,
le64_to_cpu(d.v->d_inum),
d.k->p.snapshot);
break; break;
} }
...@@ -1393,10 +2071,11 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links ...@@ -1393,10 +2071,11 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
} }
bch2_trans_iter_exit(&trans, &iter); bch2_trans_iter_exit(&trans, &iter);
ret = bch2_trans_exit(&trans) ?: ret;
if (ret) if (ret)
bch_err(c, "error in fsck: btree error %i while walking dirents", ret); bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
bch2_trans_exit(&trans);
snapshots_seen_exit(&s);
return ret; return ret;
} }
...@@ -1418,7 +2097,8 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, ...@@ -1418,7 +2097,8 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c,
for_each_btree_key(&trans, iter, BTREE_ID_inodes, for_each_btree_key(&trans, iter, BTREE_ID_inodes,
POS(0, range_start), POS(0, range_start),
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH, k, ret) { BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (k.k->p.offset >= range_end) if (k.k->p.offset >= range_end)
break; break;
...@@ -1434,7 +2114,8 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, ...@@ -1434,7 +2114,8 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c,
if (!u.bi_nlink) if (!u.bi_nlink)
continue; continue;
while (link->inum < k.k->p.offset) { while ((cmp_int(link->inum, k.k->p.offset) ?:
cmp_int(link->snapshot, k.k->p.snapshot)) < 0) {
link++; link++;
BUG_ON(link >= links->d + links->nr); BUG_ON(link >= links->d + links->nr);
} }
...@@ -1507,14 +2188,13 @@ static int check_nlinks(struct bch_fs *c) ...@@ -1507,14 +2188,13 @@ static int check_nlinks(struct bch_fs *c)
*/ */
int bch2_fsck_full(struct bch_fs *c) int bch2_fsck_full(struct bch_fs *c)
{ {
struct bch_inode_unpacked root_inode;
return bch2_fs_snapshots_check(c) ?: return bch2_fs_snapshots_check(c) ?:
check_inodes(c, true) ?: check_inodes(c, true) ?:
check_subvols(c) ?:
check_extents(c) ?: check_extents(c) ?:
check_dirents(c) ?: check_dirents(c) ?:
check_xattrs(c) ?: check_xattrs(c) ?:
check_root(c, &root_inode) ?: check_root(c) ?:
check_directory_structure(c) ?: check_directory_structure(c) ?:
check_nlinks(c); check_nlinks(c);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment