Commit e751c01a authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Start using bpos.snapshot field

This patch starts treating the bpos.snapshot field like part of the key
in the btree code:

* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
  and xattrs) now always have their snapshot field set to U32_MAX

The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.

We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).

This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 4cf91b02
...@@ -142,19 +142,18 @@ struct bpos { ...@@ -142,19 +142,18 @@ struct bpos {
#define KEY_SNAPSHOT_MAX ((__u32)~0U) #define KEY_SNAPSHOT_MAX ((__u32)~0U)
#define KEY_SIZE_MAX ((__u32)~0U) #define KEY_SIZE_MAX ((__u32)~0U)
static inline struct bpos POS(__u64 inode, __u64 offset) static inline struct bpos SPOS(__u64 inode, __u64 offset, __u32 snapshot)
{ {
struct bpos ret; return (struct bpos) {
.inode = inode,
ret.inode = inode; .offset = offset,
ret.offset = offset; .snapshot = snapshot,
ret.snapshot = 0; };
return ret;
} }
#define POS_MIN POS(0, 0) #define POS_MIN SPOS(0, 0, 0)
#define POS_MAX POS(KEY_INODE_MAX, KEY_OFFSET_MAX) #define POS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, KEY_SNAPSHOT_MAX)
#define POS(_inode, _offset) SPOS(_inode, _offset, 0)
/* Empty placeholder struct, for container_of() */ /* Empty placeholder struct, for container_of() */
struct bch_val { struct bch_val {
...@@ -1208,7 +1207,8 @@ enum bcachefs_metadata_version { ...@@ -1208,7 +1207,8 @@ enum bcachefs_metadata_version {
bcachefs_metadata_version_new_versioning = 10, bcachefs_metadata_version_new_versioning = 10,
bcachefs_metadata_version_bkey_renumber = 10, bcachefs_metadata_version_bkey_renumber = 10,
bcachefs_metadata_version_inode_btree_change = 11, bcachefs_metadata_version_inode_btree_change = 11,
bcachefs_metadata_version_max = 12, bcachefs_metadata_version_snapshot = 12,
bcachefs_metadata_version_max = 13,
}; };
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
...@@ -1749,7 +1749,7 @@ struct btree_node { ...@@ -1749,7 +1749,7 @@ struct btree_node {
/* Closed interval: */ /* Closed interval: */
struct bpos min_key; struct bpos min_key;
struct bpos max_key; struct bpos max_key;
struct bch_extent_ptr ptr; struct bch_extent_ptr _ptr; /* not used anymore */
struct bkey_format format; struct bkey_format format;
union { union {
......
...@@ -617,15 +617,19 @@ const char *bch2_bkey_format_validate(struct bkey_format *f) ...@@ -617,15 +617,19 @@ const char *bch2_bkey_format_validate(struct bkey_format *f)
return "incorrect number of fields"; return "incorrect number of fields";
for (i = 0; i < f->nr_fields; i++) { for (i = 0; i < f->nr_fields; i++) {
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1));
u64 field_offset = le64_to_cpu(f->field_offset[i]); u64 field_offset = le64_to_cpu(f->field_offset[i]);
if (f->bits_per_field[i] > 64) if (f->bits_per_field[i] > unpacked_bits)
return "field too large"; return "field too large";
if (field_offset && if ((f->bits_per_field[i] == unpacked_bits) && field_offset)
(f->bits_per_field[i] == 64 || return "offset + bits overflow";
(field_offset + ((1ULL << f->bits_per_field[i]) - 1) <
field_offset))) if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) &
unpacked_mask) <
field_offset)
return "offset + bits overflow"; return "offset + bits overflow";
bits += f->bits_per_field[i]; bits += f->bits_per_field[i];
...@@ -1126,11 +1130,12 @@ void bch2_bkey_pack_test(void) ...@@ -1126,11 +1130,12 @@ void bch2_bkey_pack_test(void)
struct bkey_packed p; struct bkey_packed p;
struct bkey_format test_format = { struct bkey_format test_format = {
.key_u64s = 2, .key_u64s = 3,
.nr_fields = BKEY_NR_FIELDS, .nr_fields = BKEY_NR_FIELDS,
.bits_per_field = { .bits_per_field = {
13, 13,
64, 64,
32,
}, },
}; };
......
...@@ -258,24 +258,46 @@ static inline unsigned bkey_format_key_bits(const struct bkey_format *format) ...@@ -258,24 +258,46 @@ static inline unsigned bkey_format_key_bits(const struct bkey_format *format)
format->bits_per_field[BKEY_FIELD_SNAPSHOT]; format->bits_per_field[BKEY_FIELD_SNAPSHOT];
} }
static inline struct bpos bkey_successor(struct bpos p) static inline struct bpos bpos_successor(struct bpos p)
{ {
struct bpos ret = p; if (!++p.snapshot &&
!++p.offset &&
!++p.inode)
BUG();
if (!++ret.offset) return p;
BUG_ON(!++ret.inode); }
return ret; static inline struct bpos bpos_predecessor(struct bpos p)
{
if (!p.snapshot-- &&
!p.offset-- &&
!p.inode--)
BUG();
return p;
} }
static inline struct bpos bkey_predecessor(struct bpos p) static inline struct bpos bpos_nosnap_successor(struct bpos p)
{ {
struct bpos ret = p; p.snapshot = 0;
if (!ret.offset--) if (!++p.offset &&
BUG_ON(!ret.inode--); !++p.inode)
BUG();
return ret; return p;
}
static inline struct bpos bpos_nosnap_predecessor(struct bpos p)
{
p.snapshot = 0;
if (!p.offset-- &&
!p.inode--)
BUG();
return p;
} }
static inline u64 bkey_start_offset(const struct bkey *k) static inline u64 bkey_start_offset(const struct bkey *k)
......
...@@ -119,9 +119,16 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, ...@@ -119,9 +119,16 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
return "nonzero size field"; return "nonzero size field";
} }
if (k.k->p.snapshot) if (type != BKEY_TYPE_btree &&
!btree_type_has_snapshots(type) &&
k.k->p.snapshot)
return "nonzero snapshot"; return "nonzero snapshot";
if (type != BKEY_TYPE_btree &&
btree_type_has_snapshots(type) &&
k.k->p.snapshot != U32_MAX)
return "invalid snapshot field";
if (type != BKEY_TYPE_btree && if (type != BKEY_TYPE_btree &&
!bkey_cmp(k.k->p, POS_MAX)) !bkey_cmp(k.k->p, POS_MAX))
return "POS_MAX key"; return "POS_MAX key";
...@@ -310,14 +317,15 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id, ...@@ -310,14 +317,15 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
const struct bkey_ops *ops; const struct bkey_ops *ops;
struct bkey uk; struct bkey uk;
struct bkey_s u; struct bkey_s u;
unsigned nr_compat = 5;
int i; int i;
/* /*
* Do these operations in reverse order in the write path: * Do these operations in reverse order in the write path:
*/ */
for (i = 0; i < 4; i++) for (i = 0; i < nr_compat; i++)
switch (!write ? i : 3 - i) { switch (!write ? i : nr_compat - 1 - i) {
case 0: case 0:
if (big_endian != CPU_BIG_ENDIAN) if (big_endian != CPU_BIG_ENDIAN)
bch2_bkey_swab_key(f, k); bch2_bkey_swab_key(f, k);
...@@ -351,6 +359,28 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id, ...@@ -351,6 +359,28 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
} }
break; break;
case 3: case 3:
if (version < bcachefs_metadata_version_snapshot &&
(level || btree_type_has_snapshots(btree_id))) {
struct bkey_i *u = packed_to_bkey(k);
if (u) {
u->k.p.snapshot = write
? 0 : U32_MAX;
} else {
u64 min_packed = f->field_offset[BKEY_FIELD_SNAPSHOT];
u64 max_packed = min_packed +
~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
uk = __bch2_bkey_unpack_key(f, k);
uk.p.snapshot = write
? min_packed : min_t(u64, U32_MAX, max_packed);
BUG_ON(!bch2_bkey_pack_key(k, &uk, f));
}
}
break;
case 4:
if (!bkey_packed(k)) { if (!bkey_packed(k)) {
u = bkey_i_to_s(packed_to_bkey(k)); u = bkey_i_to_s(packed_to_bkey(k));
} else { } else {
......
...@@ -1438,7 +1438,7 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, ...@@ -1438,7 +1438,7 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
* to the search key is going to have 0 sectors after the search key. * to the search key is going to have 0 sectors after the search key.
* *
* But this does mean that we can't just search for * But this does mean that we can't just search for
* bkey_successor(start_of_range) to get the first extent that overlaps with * bpos_successor(start_of_range) to get the first extent that overlaps with
* the range we want - if we're unlucky and there's an extent that ends * the range we want - if we're unlucky and there's an extent that ends
* exactly where we searched, then there could be a deleted key at the same * exactly where we searched, then there could be a deleted key at the same
* position and we'd get that when we search instead of the preceding extent * position and we'd get that when we search instead of the preceding extent
......
...@@ -1018,7 +1018,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, ...@@ -1018,7 +1018,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
if (sib != btree_prev_sib) if (sib != btree_prev_sib)
swap(n1, n2); swap(n1, n2);
if (bpos_cmp(bkey_successor(n1->key.k.p), if (bpos_cmp(bpos_successor(n1->key.k.p),
n2->data->min_key)) { n2->data->min_key)) {
char buf1[200], buf2[200]; char buf1[200], buf2[200];
......
...@@ -64,7 +64,7 @@ static int bch2_gc_check_topology(struct bch_fs *c, ...@@ -64,7 +64,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
struct bpos node_end = b->data->max_key; struct bpos node_end = b->data->max_key;
struct bpos expected_start = bkey_deleted(&prev->k->k) struct bpos expected_start = bkey_deleted(&prev->k->k)
? node_start ? node_start
: bkey_successor(prev->k->k.p); : bpos_successor(prev->k->k.p);
char buf1[200], buf2[200]; char buf1[200], buf2[200];
bool update_min = false; bool update_min = false;
bool update_max = false; bool update_max = false;
...@@ -1187,7 +1187,9 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) ...@@ -1187,7 +1187,9 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
BTREE_ITER_PREFETCH); BTREE_ITER_PREFETCH|
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_ALL_SNAPSHOTS);
while ((k = bch2_btree_iter_peek(iter)).k && while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k))) { !(ret = bkey_err(k))) {
...@@ -1405,7 +1407,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter, ...@@ -1405,7 +1407,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
n1->key.k.p = n1->data->max_key = n1->key.k.p = n1->data->max_key =
bkey_unpack_pos(n1, last); bkey_unpack_pos(n1, last);
n2->data->min_key = bkey_successor(n1->data->max_key); n2->data->min_key = bpos_successor(n1->data->max_key);
memcpy_u64s(vstruct_last(s1), memcpy_u64s(vstruct_last(s1),
s2->start, u64s); s2->start, u64s);
......
...@@ -612,12 +612,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, ...@@ -612,12 +612,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
BTREE_ERR_MUST_RETRY, c, ca, b, i, BTREE_ERR_MUST_RETRY, c, ca, b, i,
"incorrect level"); "incorrect level");
if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
u64 *p = (u64 *) &bn->ptr;
*p = swab64(*p);
}
if (!write) if (!write)
compat_btree_node(b->c.level, b->c.btree_id, version, compat_btree_node(b->c.level, b->c.btree_id, version,
BSET_BIG_ENDIAN(i), write, bn); BSET_BIG_ENDIAN(i), write, bn);
...@@ -1328,8 +1322,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, ...@@ -1328,8 +1322,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_btree)) if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_btree))
return -1; return -1;
ret = validate_bset(c, NULL, b, i, sectors, WRITE, false) ?: ret = validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false) ?:
validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false); validate_bset(c, NULL, b, i, sectors, WRITE, false);
if (ret) { if (ret) {
bch2_inconsistent_error(c); bch2_inconsistent_error(c);
dump_stack(); dump_stack();
...@@ -1482,7 +1476,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, ...@@ -1482,7 +1476,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
validate_before_checksum = true; validate_before_checksum = true;
/* validate_bset will be modifying: */ /* validate_bset will be modifying: */
if (le16_to_cpu(i->version) <= bcachefs_metadata_version_inode_btree_change) if (le16_to_cpu(i->version) < bcachefs_metadata_version_current)
validate_before_checksum = true; validate_before_checksum = true;
/* if we're going to be encrypting, check metadata validity first: */ /* if we're going to be encrypting, check metadata validity first: */
......
...@@ -189,8 +189,8 @@ void bch2_btree_flush_all_writes(struct bch_fs *); ...@@ -189,8 +189,8 @@ void bch2_btree_flush_all_writes(struct bch_fs *);
void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *); void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *);
static inline void compat_bformat(unsigned level, enum btree_id btree_id, static inline void compat_bformat(unsigned level, enum btree_id btree_id,
unsigned version, unsigned big_endian, unsigned version, unsigned big_endian,
int write, struct bkey_format *f) int write, struct bkey_format *f)
{ {
if (version < bcachefs_metadata_version_inode_btree_change && if (version < bcachefs_metadata_version_inode_btree_change &&
btree_id == BTREE_ID_inodes) { btree_id == BTREE_ID_inodes) {
...@@ -199,6 +199,16 @@ static inline void compat_bformat(unsigned level, enum btree_id btree_id, ...@@ -199,6 +199,16 @@ static inline void compat_bformat(unsigned level, enum btree_id btree_id,
swap(f->field_offset[BKEY_FIELD_INODE], swap(f->field_offset[BKEY_FIELD_INODE],
f->field_offset[BKEY_FIELD_OFFSET]); f->field_offset[BKEY_FIELD_OFFSET]);
} }
if (version < bcachefs_metadata_version_snapshot &&
(level || btree_type_has_snapshots(btree_id))) {
u64 max_packed =
~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
f->field_offset[BKEY_FIELD_SNAPSHOT] = write
? 0
: U32_MAX - max_packed;
}
} }
static inline void compat_bpos(unsigned level, enum btree_id btree_id, static inline void compat_bpos(unsigned level, enum btree_id btree_id,
...@@ -222,16 +232,24 @@ static inline void compat_btree_node(unsigned level, enum btree_id btree_id, ...@@ -222,16 +232,24 @@ static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
btree_node_type_is_extents(btree_id) && btree_node_type_is_extents(btree_id) &&
bpos_cmp(bn->min_key, POS_MIN) && bpos_cmp(bn->min_key, POS_MIN) &&
write) write)
bn->min_key = bkey_predecessor(bn->min_key); bn->min_key = bpos_nosnap_predecessor(bn->min_key);
if (version < bcachefs_metadata_version_snapshot &&
write)
bn->max_key.snapshot = 0;
compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key); compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key);
compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key); compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key);
if (version < bcachefs_metadata_version_snapshot &&
!write)
bn->max_key.snapshot = U32_MAX;
if (version < bcachefs_metadata_version_inode_btree_change && if (version < bcachefs_metadata_version_inode_btree_change &&
btree_node_type_is_extents(btree_id) && btree_node_type_is_extents(btree_id) &&
bpos_cmp(bn->min_key, POS_MIN) && bpos_cmp(bn->min_key, POS_MIN) &&
!write) !write)
bn->min_key = bkey_successor(bn->min_key); bn->min_key = bpos_nosnap_successor(bn->min_key);
} }
#endif /* _BCACHEFS_BTREE_IO_H */ #endif /* _BCACHEFS_BTREE_IO_H */
...@@ -18,6 +18,36 @@ ...@@ -18,6 +18,36 @@
static void btree_iter_set_search_pos(struct btree_iter *, struct bpos); static void btree_iter_set_search_pos(struct btree_iter *, struct bpos);
static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
{
EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES);
/* Are we iterating over keys in all snapshots? */
if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
p = bpos_successor(p);
} else {
p = bpos_nosnap_successor(p);
p.snapshot = iter->snapshot;
}
return p;
}
static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p)
{
EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES);
/* Are we iterating over keys in all snapshots? */
if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
p = bpos_predecessor(p);
} else {
p = bpos_nosnap_predecessor(p);
p.snapshot = iter->snapshot;
}
return p;
}
static inline bool is_btree_node(struct btree_iter *iter, unsigned l) static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
{ {
return l < BTREE_MAX_DEPTH && return l < BTREE_MAX_DEPTH &&
...@@ -30,7 +60,7 @@ static inline struct bpos btree_iter_search_key(struct btree_iter *iter) ...@@ -30,7 +60,7 @@ static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
if ((iter->flags & BTREE_ITER_IS_EXTENTS) && if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
bkey_cmp(pos, POS_MAX)) bkey_cmp(pos, POS_MAX))
pos = bkey_successor(pos); pos = bkey_successor(iter, pos);
return pos; return pos;
} }
...@@ -591,10 +621,24 @@ static void bch2_btree_iter_verify_level(struct btree_iter *iter, ...@@ -591,10 +621,24 @@ static void bch2_btree_iter_verify_level(struct btree_iter *iter,
static void bch2_btree_iter_verify(struct btree_iter *iter) static void bch2_btree_iter_verify(struct btree_iter *iter)
{ {
enum btree_iter_type type = btree_iter_type(iter);
unsigned i; unsigned i;
EBUG_ON(iter->btree_id >= BTREE_ID_NR); EBUG_ON(iter->btree_id >= BTREE_ID_NR);
BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
iter->pos.snapshot != iter->snapshot);
BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) &&
(iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
BUG_ON(type == BTREE_ITER_NODES &&
!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
BUG_ON(type != BTREE_ITER_NODES &&
(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
!btree_type_has_snapshots(iter->btree_id));
bch2_btree_iter_verify_locks(iter); bch2_btree_iter_verify_locks(iter);
for (i = 0; i < BTREE_MAX_DEPTH; i++) for (i = 0; i < BTREE_MAX_DEPTH; i++)
...@@ -605,6 +649,9 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) ...@@ -605,6 +649,9 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
{ {
enum btree_iter_type type = btree_iter_type(iter); enum btree_iter_type type = btree_iter_type(iter);
BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
iter->pos.snapshot != iter->snapshot);
BUG_ON((type == BTREE_ITER_KEYS || BUG_ON((type == BTREE_ITER_KEYS ||
type == BTREE_ITER_CACHED) && type == BTREE_ITER_CACHED) &&
(bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 || (bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
...@@ -1434,7 +1481,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) ...@@ -1434,7 +1481,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
* Haven't gotten to the end of the parent node: go back down to * Haven't gotten to the end of the parent node: go back down to
* the next child node * the next child node
*/ */
btree_iter_set_search_pos(iter, bkey_successor(iter->pos)); btree_iter_set_search_pos(iter, bpos_successor(iter->pos));
/* Unlock to avoid screwing up our lock invariants: */ /* Unlock to avoid screwing up our lock invariants: */
btree_node_unlock(iter, iter->level); btree_node_unlock(iter, iter->level);
...@@ -1508,7 +1555,7 @@ inline bool bch2_btree_iter_advance(struct btree_iter *iter) ...@@ -1508,7 +1555,7 @@ inline bool bch2_btree_iter_advance(struct btree_iter *iter)
bool ret = bpos_cmp(pos, POS_MAX) != 0; bool ret = bpos_cmp(pos, POS_MAX) != 0;
if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
pos = bkey_successor(pos); pos = bkey_successor(iter, pos);
bch2_btree_iter_set_pos(iter, pos); bch2_btree_iter_set_pos(iter, pos);
return ret; return ret;
} }
...@@ -1519,7 +1566,7 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter) ...@@ -1519,7 +1566,7 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
bool ret = bpos_cmp(pos, POS_MIN) != 0; bool ret = bpos_cmp(pos, POS_MIN) != 0;
if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
pos = bkey_predecessor(pos); pos = bkey_predecessor(iter, pos);
bch2_btree_iter_set_pos(iter, pos); bch2_btree_iter_set_pos(iter, pos);
return ret; return ret;
} }
...@@ -1535,7 +1582,7 @@ static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter) ...@@ -1535,7 +1582,7 @@ static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
* btree, in that case we want iter->pos to reflect that: * btree, in that case we want iter->pos to reflect that:
*/ */
if (ret) if (ret)
btree_iter_set_search_pos(iter, bkey_successor(next_pos)); btree_iter_set_search_pos(iter, bpos_successor(next_pos));
else else
bch2_btree_iter_set_pos(iter, POS_MAX); bch2_btree_iter_set_pos(iter, POS_MAX);
...@@ -1548,7 +1595,7 @@ static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter) ...@@ -1548,7 +1595,7 @@ static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
bool ret = bpos_cmp(next_pos, POS_MIN) != 0; bool ret = bpos_cmp(next_pos, POS_MIN) != 0;
if (ret) if (ret)
btree_iter_set_search_pos(iter, bkey_predecessor(next_pos)); btree_iter_set_search_pos(iter, bpos_predecessor(next_pos));
else else
bch2_btree_iter_set_pos(iter, POS_MIN); bch2_btree_iter_set_pos(iter, POS_MIN);
...@@ -1594,13 +1641,13 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter, bool wi ...@@ -1594,13 +1641,13 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter, bool wi
k = btree_iter_level_peek(iter, &iter->l[0]); k = btree_iter_level_peek(iter, &iter->l[0]);
if (next_update && if (next_update &&
bkey_cmp(next_update->k.p, iter->real_pos) <= 0) bpos_cmp(next_update->k.p, iter->real_pos) <= 0)
k = bkey_i_to_s_c(next_update); k = bkey_i_to_s_c(next_update);
if (likely(k.k)) { if (likely(k.k)) {
if (bkey_deleted(k.k)) { if (bkey_deleted(k.k)) {
btree_iter_set_search_pos(iter, btree_iter_set_search_pos(iter,
bkey_successor(k.k->p)); bkey_successor(iter, k.k->p));
continue; continue;
} }
...@@ -1739,7 +1786,7 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter) ...@@ -1739,7 +1786,7 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
if (iter->pos.inode == KEY_INODE_MAX) if (iter->pos.inode == KEY_INODE_MAX)
return bkey_s_c_null; return bkey_s_c_null;
bch2_btree_iter_set_pos(iter, bkey_successor(iter->pos)); bch2_btree_iter_set_pos(iter, bkey_successor(iter, iter->pos));
} }
pos = iter->pos; pos = iter->pos;
...@@ -1973,6 +2020,14 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, ...@@ -1973,6 +2020,14 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
{ {
struct btree_iter *iter, *best = NULL; struct btree_iter *iter, *best = NULL;
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
!btree_type_has_snapshots(btree_id))
flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
if (!(flags & BTREE_ITER_ALL_SNAPSHOTS))
pos.snapshot = btree_type_has_snapshots(btree_id)
? U32_MAX : 0;
/* We always want a fresh iterator for node iterators: */ /* We always want a fresh iterator for node iterators: */
if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_NODES) if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_NODES)
goto alloc_iter; goto alloc_iter;
...@@ -2007,11 +2062,14 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, ...@@ -2007,11 +2062,14 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES && if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
btree_node_type_is_extents(btree_id) && btree_node_type_is_extents(btree_id) &&
!(flags & BTREE_ITER_NOT_EXTENTS)) !(flags & BTREE_ITER_NOT_EXTENTS) &&
!(flags & BTREE_ITER_ALL_SNAPSHOTS))
flags |= BTREE_ITER_IS_EXTENTS; flags |= BTREE_ITER_IS_EXTENTS;
iter->flags = flags; iter->flags = flags;
iter->snapshot = pos.snapshot;
if (!(iter->flags & BTREE_ITER_INTENT)) if (!(iter->flags & BTREE_ITER_INTENT))
bch2_btree_iter_downgrade(iter); bch2_btree_iter_downgrade(iter);
else if (!iter->locks_want) else if (!iter->locks_want)
...@@ -2034,6 +2092,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans, ...@@ -2034,6 +2092,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
__bch2_trans_get_iter(trans, btree_id, pos, __bch2_trans_get_iter(trans, btree_id, pos,
BTREE_ITER_NODES| BTREE_ITER_NODES|
BTREE_ITER_NOT_EXTENTS| BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_ALL_SNAPSHOTS|
flags); flags);
unsigned i; unsigned i;
......
...@@ -172,6 +172,9 @@ bool bch2_btree_iter_rewind(struct btree_iter *); ...@@ -172,6 +172,9 @@ bool bch2_btree_iter_rewind(struct btree_iter *);
static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
{ {
if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
new_pos.snapshot = iter->snapshot;
bkey_init(&iter->k); bkey_init(&iter->k);
iter->k.p = iter->pos = new_pos; iter->k.p = iter->pos = new_pos;
} }
......
...@@ -216,6 +216,7 @@ enum btree_iter_type { ...@@ -216,6 +216,7 @@ enum btree_iter_type {
#define BTREE_ITER_CACHED_NOFILL (1 << 9) #define BTREE_ITER_CACHED_NOFILL (1 << 9)
#define BTREE_ITER_CACHED_NOCREATE (1 << 10) #define BTREE_ITER_CACHED_NOCREATE (1 << 10)
#define BTREE_ITER_NOT_EXTENTS (1 << 11) #define BTREE_ITER_NOT_EXTENTS (1 << 11)
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
enum btree_iter_uptodate { enum btree_iter_uptodate {
BTREE_ITER_UPTODATE = 0, BTREE_ITER_UPTODATE = 0,
...@@ -245,6 +246,8 @@ struct btree_iter { ...@@ -245,6 +246,8 @@ struct btree_iter {
/* what we're searching for/what the iterator actually points to: */ /* what we're searching for/what the iterator actually points to: */
struct bpos real_pos; struct bpos real_pos;
struct bpos pos_after_commit; struct bpos pos_after_commit;
/* When we're filtering by snapshot, the snapshot ID we're looking for: */
unsigned snapshot;
u16 flags; u16 flags;
u8 idx; u8 idx;
...@@ -329,7 +332,7 @@ struct bkey_cached { ...@@ -329,7 +332,7 @@ struct bkey_cached {
struct btree_insert_entry { struct btree_insert_entry {
unsigned trigger_flags; unsigned trigger_flags;
u8 bkey_type; u8 bkey_type;
u8 btree_id; enum btree_id btree_id:8;
u8 level; u8 level;
unsigned trans_triggers_run:1; unsigned trans_triggers_run:1;
unsigned is_extent:1; unsigned is_extent:1;
...@@ -610,6 +613,17 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter) ...@@ -610,6 +613,17 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter)
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
BTREE_NODE_TYPE_HAS_MEM_TRIGGERS) BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
#define BTREE_ID_HAS_SNAPSHOTS \
((1U << BTREE_ID_extents)| \
(1U << BTREE_ID_inodes)| \
(1U << BTREE_ID_dirents)| \
(1U << BTREE_ID_xattrs))
static inline bool btree_type_has_snapshots(enum btree_id id)
{
return (1 << id) & BTREE_ID_HAS_SNAPSHOTS;
}
enum btree_trigger_flags { enum btree_trigger_flags {
__BTREE_TRIGGER_NORUN, /* Don't run triggers at all */ __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */
......
...@@ -69,7 +69,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) ...@@ -69,7 +69,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
break; break;
} }
next_node = bkey_successor(k.k->p); next_node = bpos_successor(k.k->p);
} }
#endif #endif
} }
...@@ -289,7 +289,6 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev ...@@ -289,7 +289,6 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
b->data->flags = 0; b->data->flags = 0;
SET_BTREE_NODE_ID(b->data, as->btree_id); SET_BTREE_NODE_ID(b->data, as->btree_id);
SET_BTREE_NODE_LEVEL(b->data, level); SET_BTREE_NODE_LEVEL(b->data, level);
b->data->ptr = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)).start->ptr;
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key); struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key);
...@@ -1100,6 +1099,7 @@ static struct btree *__btree_split_node(struct btree_update *as, ...@@ -1100,6 +1099,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
struct btree *n2; struct btree *n2;
struct bset *set1, *set2; struct bset *set1, *set2;
struct bkey_packed *k, *set2_start, *set2_end, *out, *prev = NULL; struct bkey_packed *k, *set2_start, *set2_end, *out, *prev = NULL;
struct bpos n1_pos;
n2 = bch2_btree_node_alloc(as, n1->c.level); n2 = bch2_btree_node_alloc(as, n1->c.level);
bch2_btree_update_add_new_node(as, n2); bch2_btree_update_add_new_node(as, n2);
...@@ -1146,8 +1146,12 @@ static struct btree *__btree_split_node(struct btree_update *as, ...@@ -1146,8 +1146,12 @@ static struct btree *__btree_split_node(struct btree_update *as,
n1->nr.packed_keys = nr_packed; n1->nr.packed_keys = nr_packed;
n1->nr.unpacked_keys = nr_unpacked; n1->nr.unpacked_keys = nr_unpacked;
btree_set_max(n1, bkey_unpack_pos(n1, prev)); n1_pos = bkey_unpack_pos(n1, prev);
btree_set_min(n2, bkey_successor(n1->key.k.p)); if (as->c->sb.version < bcachefs_metadata_version_snapshot)
n1_pos.snapshot = U32_MAX;
btree_set_max(n1, n1_pos);
btree_set_min(n2, bpos_successor(n1->key.k.p));
bch2_bkey_format_init(&s); bch2_bkey_format_init(&s);
bch2_bkey_format_add_pos(&s, n2->data->min_key); bch2_bkey_format_add_pos(&s, n2->data->min_key);
......
...@@ -223,9 +223,17 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, ...@@ -223,9 +223,17 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
BUG_ON(bch2_debug_check_bkeys && if (bch2_debug_check_bkeys) {
bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type)); const char *invalid = bch2_bkey_invalid(c,
BUG_ON(bpos_cmp(i->k->k.p, i->iter->real_pos)); bkey_i_to_s_c(i->k), i->bkey_type);
if (invalid) {
char buf[200];
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
panic("invalid bkey %s on insert: %s\n", buf, invalid);
}
}
BUG_ON(!i->is_extent && bpos_cmp(i->k->k.p, i->iter->real_pos));
BUG_ON(i->level != i->iter->level); BUG_ON(i->level != i->iter->level);
BUG_ON(i->btree_id != i->iter->btree_id); BUG_ON(i->btree_id != i->iter->btree_id);
} }
......
...@@ -222,7 +222,9 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, ...@@ -222,7 +222,9 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
bch2_trans_init(&trans, i->c, 0, 0); bch2_trans_init(&trans, i->c, 0, 0);
iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH); iter = bch2_trans_get_iter(&trans, i->id, i->from,
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS);
k = bch2_btree_iter_peek(iter); k = bch2_btree_iter_peek(iter);
while (k.k && !(err = bkey_err(k))) { while (k.k && !(err = bkey_err(k))) {
...@@ -290,7 +292,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, ...@@ -290,7 +292,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
* all nodes, meh * all nodes, meh
*/ */
i->from = bpos_cmp(POS_MAX, b->key.k.p) i->from = bpos_cmp(POS_MAX, b->key.k.p)
? bkey_successor(b->key.k.p) ? bpos_successor(b->key.k.p)
: b->key.k.p; : b->key.k.p;
if (!i->size) if (!i->size)
......
...@@ -179,7 +179,8 @@ const char *bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k) ...@@ -179,7 +179,8 @@ const char *bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k)
if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
return "value too big"; return "value too big";
if (bp.v->min_key.snapshot) if (c->sb.version < bcachefs_metadata_version_snapshot &&
bp.v->min_key.snapshot)
return "invalid min_key.snapshot"; return "invalid min_key.snapshot";
return bch2_bkey_ptrs_invalid(c, k); return bch2_bkey_ptrs_invalid(c, k);
...@@ -211,8 +212,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version, ...@@ -211,8 +212,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
btree_node_type_is_extents(btree_id) && btree_node_type_is_extents(btree_id) &&
bkey_cmp(bp.v->min_key, POS_MIN)) bkey_cmp(bp.v->min_key, POS_MIN))
bp.v->min_key = write bp.v->min_key = write
? bkey_predecessor(bp.v->min_key) ? bpos_nosnap_predecessor(bp.v->min_key)
: bkey_successor(bp.v->min_key); : bpos_nosnap_successor(bp.v->min_key);
} }
/* KEY_TYPE_extent: */ /* KEY_TYPE_extent: */
......
...@@ -1318,6 +1318,7 @@ static int check_inode(struct btree_trans *trans, ...@@ -1318,6 +1318,7 @@ static int check_inode(struct btree_trans *trans,
struct bkey_inode_buf p; struct bkey_inode_buf p;
bch2_inode_pack(c, &p, &u); bch2_inode_pack(c, &p, &u);
p.inode.k.p = iter->pos;
ret = __bch2_trans_do(trans, NULL, NULL, ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
......
...@@ -332,6 +332,7 @@ int bch2_inode_write(struct btree_trans *trans, ...@@ -332,6 +332,7 @@ int bch2_inode_write(struct btree_trans *trans,
return PTR_ERR(inode_p); return PTR_ERR(inode_p);
bch2_inode_pack(trans->c, inode_p, inode); bch2_inode_pack(trans->c, inode_p, inode);
inode_p->inode.k.p.snapshot = iter->snapshot;
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
return 0; return 0;
} }
......
...@@ -332,6 +332,9 @@ int bch2_extent_update(struct btree_trans *trans, ...@@ -332,6 +332,9 @@ int bch2_extent_update(struct btree_trans *trans,
if (i_sectors_delta || new_i_size) { if (i_sectors_delta || new_i_size) {
bch2_inode_pack(trans->c, &inode_p, &inode_u); bch2_inode_pack(trans->c, &inode_p, &inode_u);
inode_p.inode.k.p.snapshot = iter->snapshot;
bch2_trans_update(trans, inode_iter, bch2_trans_update(trans, inode_iter,
&inode_p.inode.k_i, 0); &inode_p.inode.k_i, 0);
} }
...@@ -447,6 +450,8 @@ int bch2_write_index_default(struct bch_write_op *op) ...@@ -447,6 +450,8 @@ int bch2_write_index_default(struct bch_write_op *op)
k = bch2_keylist_front(keys); k = bch2_keylist_front(keys);
k->k.p.snapshot = iter->snapshot;
bch2_bkey_buf_realloc(&sk, c, k->k.u64s); bch2_bkey_buf_realloc(&sk, c, k->k.u64s);
bkey_copy(sk.k, k); bkey_copy(sk.k, k);
bch2_cut_front(iter->pos, sk.k); bch2_cut_front(iter->pos, sk.k);
......
...@@ -1449,7 +1449,7 @@ void bch2_journal_write(struct closure *cl) ...@@ -1449,7 +1449,7 @@ void bch2_journal_write(struct closure *cl)
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset))) if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
validate_before_checksum = true; validate_before_checksum = true;
if (le32_to_cpu(jset->version) <= bcachefs_metadata_version_inode_btree_change) if (le32_to_cpu(jset->version) < bcachefs_metadata_version_current)
validate_before_checksum = true; validate_before_checksum = true;
if (validate_before_checksum && if (validate_before_checksum &&
......
...@@ -998,6 +998,13 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -998,6 +998,13 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err; goto err;
} }
if (!(c->sb.compat & (1ULL << BCH_COMPAT_bformat_overflow_done))) {
bch_err(c, "filesystem may have incompatible bkey formats; run fsck from the compat branch to fix");
ret = -EINVAL;
goto err;
}
if (!(c->sb.features & (1ULL << BCH_FEATURE_alloc_v2))) { if (!(c->sb.features & (1ULL << BCH_FEATURE_alloc_v2))) {
bch_info(c, "alloc_v2 feature bit not set, fsck required"); bch_info(c, "alloc_v2 feature bit not set, fsck required");
c->opts.fsck = true; c->opts.fsck = true;
...@@ -1340,6 +1347,7 @@ int bch2_fs_initialize(struct bch_fs *c) ...@@ -1340,6 +1347,7 @@ int bch2_fs_initialize(struct bch_fs *c)
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL); S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
root_inode.bi_inum = BCACHEFS_ROOT_INO; root_inode.bi_inum = BCACHEFS_ROOT_INO;
bch2_inode_pack(c, &packed_inode, &root_inode); bch2_inode_pack(c, &packed_inode, &root_inode);
packed_inode.inode.k.p.snapshot = U32_MAX;
err = "error creating root directory"; err = "error creating root directory";
ret = bch2_btree_insert(c, BTREE_ID_inodes, ret = bch2_btree_insert(c, BTREE_ID_inodes,
......
...@@ -483,6 +483,7 @@ static int rand_insert(struct bch_fs *c, u64 nr) ...@@ -483,6 +483,7 @@ static int rand_insert(struct bch_fs *c, u64 nr)
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
bkey_cookie_init(&k.k_i); bkey_cookie_init(&k.k_i);
k.k.p.offset = test_rand(); k.k.p.offset = test_rand();
k.k.p.snapshot = U32_MAX;
ret = __bch2_trans_do(&trans, NULL, NULL, 0, ret = __bch2_trans_do(&trans, NULL, NULL, 0,
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i)); __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment