Commit 79203111 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Unwritten extents support

 - bch2_extent_merge checks unwritten bit
 - read path returns 0s for unwritten extents without actually reading
 - reflink path skips over unwritten extents
 - bch2_bkey_ptrs_invalid() checks for extents with both written and
   unwritten extents, and non-normal extents (stripes, btree ptrs) with
   unwritten ptrs
 - fiemap checks for unwritten extents and returns
   FIEMAP_EXTENT_UNWRITTEN
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 2f1f7fe9
...@@ -582,7 +582,7 @@ struct bch_extent_ptr { ...@@ -582,7 +582,7 @@ struct bch_extent_ptr {
__u64 type:1, __u64 type:1,
cached:1, cached:1,
unused:1, unused:1,
reservation:1, unwritten:1,
offset:44, /* 8 petabytes */ offset:44, /* 8 petabytes */
dev:8, dev:8,
gen:8; gen:8;
...@@ -590,7 +590,7 @@ struct bch_extent_ptr { ...@@ -590,7 +590,7 @@ struct bch_extent_ptr {
__u64 gen:8, __u64 gen:8,
dev:8, dev:8,
offset:44, offset:44,
reservation:1, unwritten:1,
unused:1, unused:1,
cached:1, cached:1,
type:1; type:1;
......
...@@ -116,6 +116,13 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, ...@@ -116,6 +116,13 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
return -EIO; return -EIO;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
/*
* Unwritten extent: no need to actually read, treat it as a
* hole and return 0s:
*/
if (p.ptr.unwritten)
return 0;
ca = bch_dev_bkey_exists(c, p.ptr.dev); ca = bch_dev_bkey_exists(c, p.ptr.dev);
/* /*
...@@ -269,6 +276,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) ...@@ -269,6 +276,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
rp.ptr.offset + rp.crc.offset || rp.ptr.offset + rp.crc.offset ||
lp.ptr.dev != rp.ptr.dev || lp.ptr.dev != rp.ptr.dev ||
lp.ptr.gen != rp.ptr.gen || lp.ptr.gen != rp.ptr.gen ||
lp.ptr.unwritten != rp.ptr.unwritten ||
lp.has_ec != rp.has_ec) lp.has_ec != rp.has_ec)
return false; return false;
...@@ -904,6 +912,9 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2) ...@@ -904,6 +912,9 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
const union bch_extent_entry *entry1, *entry2; const union bch_extent_entry *entry1, *entry2;
struct extent_ptr_decoded p1, p2; struct extent_ptr_decoded p1, p2;
if (bkey_extent_is_unwritten(k1) != bkey_extent_is_unwritten(k2))
return false;
bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1) bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1)
bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2) bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
if (p1.ptr.dev == p2.ptr.dev && if (p1.ptr.dev == p2.ptr.dev &&
...@@ -981,10 +992,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, ...@@ -981,10 +992,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
u32 offset; u32 offset;
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
prt_printf(out, "ptr: %u:%llu:%u gen %u%s", ptr->dev, prt_printf(out, "ptr: %u:%llu:%u gen %u",
b, offset, ptr->gen, ptr->dev, b, offset, ptr->gen);
ptr->cached ? " cached" : ""); if (ptr->cached)
prt_str(out, " cached");
if (ptr->unwritten)
prt_str(out, " unwritten");
if (ca && ptr_stale(ca, ptr)) if (ca && ptr_stale(ca, ptr))
prt_printf(out, " stale"); prt_printf(out, " stale");
} }
...@@ -1073,6 +1086,7 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, ...@@ -1073,6 +1086,7 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
unsigned size_ondisk = k.k->size; unsigned size_ondisk = k.k->size;
unsigned nonce = UINT_MAX; unsigned nonce = UINT_MAX;
unsigned nr_ptrs = 0; unsigned nr_ptrs = 0;
bool unwritten = false;
int ret; int ret;
if (bkey_is_btree_ptr(k.k)) if (bkey_is_btree_ptr(k.k))
...@@ -1097,6 +1111,18 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, ...@@ -1097,6 +1111,18 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
false, err); false, err);
if (ret) if (ret)
return ret; return ret;
if (nr_ptrs && unwritten != entry->ptr.unwritten) {
prt_printf(err, "extent with unwritten and written ptrs");
return -BCH_ERR_invalid_bkey;
}
if (k.k->type != KEY_TYPE_extent && entry->ptr.unwritten) {
prt_printf(err, "has unwritten ptrs");
return -BCH_ERR_invalid_bkey;
}
unwritten = entry->ptr.unwritten;
nr_ptrs++; nr_ptrs++;
break; break;
case BCH_EXTENT_ENTRY_crc32: case BCH_EXTENT_ENTRY_crc32:
......
...@@ -510,6 +510,23 @@ static inline bool bkey_extent_is_allocation(const struct bkey *k) ...@@ -510,6 +510,23 @@ static inline bool bkey_extent_is_allocation(const struct bkey *k)
} }
} }
static inline bool bkey_extent_is_unwritten(struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
bkey_for_each_ptr(ptrs, ptr)
if (ptr->unwritten)
return true;
return false;
}
static inline bool bkey_extent_is_reservation(struct bkey_s_c k)
{
return k.k->type == KEY_TYPE_reservation ||
bkey_extent_is_unwritten(k);
}
static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k) static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
{ {
struct bch_devs_list ret = (struct bch_devs_list) { 0 }; struct bch_devs_list ret = (struct bch_devs_list) { 0 };
......
...@@ -341,11 +341,11 @@ static struct bch_page_state *bch2_page_state_create(struct page *page, ...@@ -341,11 +341,11 @@ static struct bch_page_state *bch2_page_state_create(struct page *page,
return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp); return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
} }
static unsigned bkey_to_sector_state(const struct bkey *k) static unsigned bkey_to_sector_state(struct bkey_s_c k)
{ {
if (k->type == KEY_TYPE_reservation) if (bkey_extent_is_reservation(k))
return SECTOR_RESERVED; return SECTOR_RESERVED;
if (bkey_extent_is_allocation(k)) if (bkey_extent_is_allocation(k.k))
return SECTOR_ALLOCATED; return SECTOR_ALLOCATED;
return SECTOR_UNALLOCATED; return SECTOR_UNALLOCATED;
} }
...@@ -396,7 +396,7 @@ static int bch2_page_state_set(struct bch_fs *c, subvol_inum inum, ...@@ -396,7 +396,7 @@ static int bch2_page_state_set(struct bch_fs *c, subvol_inum inum,
SPOS(inum.inum, offset, snapshot), SPOS(inum.inum, offset, snapshot),
BTREE_ITER_SLOTS, k, ret) { BTREE_ITER_SLOTS, k, ret) {
unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k); unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
unsigned state = bkey_to_sector_state(k.k); unsigned state = bkey_to_sector_state(k);
while (pg_idx < nr_pages) { while (pg_idx < nr_pages) {
struct page *page = pages[pg_idx]; struct page *page = pages[pg_idx];
...@@ -436,7 +436,7 @@ static void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k) ...@@ -436,7 +436,7 @@ static void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
struct bio_vec bv; struct bio_vec bv;
unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
? 0 : bch2_bkey_nr_ptrs_fully_allocated(k); ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
unsigned state = bkey_to_sector_state(k.k); unsigned state = bkey_to_sector_state(k);
bio_for_each_segment(bv, bio, iter) bio_for_each_segment(bv, bio, iter)
__bch2_page_state_set(bv.bv_page, bv.bv_offset >> 9, __bch2_page_state_set(bv.bv_page, bv.bv_offset >> 9,
...@@ -3093,8 +3093,8 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, ...@@ -3093,8 +3093,8 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
goto bkey_err; goto bkey_err;
/* already reserved */ /* already reserved */
if (k.k->type == KEY_TYPE_reservation && if (bkey_extent_is_reservation(k) &&
bkey_s_c_to_reservation(k).v->nr_replicas >= opts.data_replicas) { bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) {
bch2_btree_iter_advance(&iter); bch2_btree_iter_advance(&iter);
continue; continue;
} }
......
...@@ -811,6 +811,9 @@ static int bch2_fill_extent(struct bch_fs *c, ...@@ -811,6 +811,9 @@ static int bch2_fill_extent(struct bch_fs *c,
int flags2 = 0; int flags2 = 0;
u64 offset = p.ptr.offset; u64 offset = p.ptr.offset;
if (p.ptr.unwritten)
flags2 |= FIEMAP_EXTENT_UNWRITTEN;
if (p.crc.compression_type) if (p.crc.compression_type)
flags2 |= FIEMAP_EXTENT_ENCODED; flags2 |= FIEMAP_EXTENT_ENCODED;
else else
......
...@@ -1251,8 +1251,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, ...@@ -1251,8 +1251,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
continue; continue;
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
k.k->type != KEY_TYPE_reservation && k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c, !bkey_extent_is_reservation(k), c,
"extent type past end of inode %llu:%u, i_size %llu\n %s", "extent type past end of inode %llu:%u, i_size %llu\n %s",
i->inode.bi_inum, i->snapshot, i->inode.bi_size, i->inode.bi_inum, i->snapshot, i->inode.bi_size,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
......
...@@ -1481,6 +1481,9 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k, ...@@ -1481,6 +1481,9 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
if (bch2_bkey_has_target(c, k, opts.promote_target)) if (bch2_bkey_has_target(c, k, opts.promote_target))
return false; return false;
if (bkey_extent_is_unwritten(k))
return false;
if (bch2_target_congested(c, opts.promote_target)) { if (bch2_target_congested(c, opts.promote_target)) {
/* XXX trace this */ /* XXX trace this */
return false; return false;
......
...@@ -251,9 +251,13 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) ...@@ -251,9 +251,13 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret) for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret) {
if (bkey_extent_is_unwritten(k))
continue;
if (bkey_extent_is_data(k.k)) if (bkey_extent_is_data(k.k))
return k; return k;
}
if (bkey_ge(iter->pos, end)) if (bkey_ge(iter->pos, end))
bch2_btree_iter_set_pos(iter, end); bch2_btree_iter_set_pos(iter, end);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment