Commit a2cb8a62 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Self healing on read IO error

This repurposes the promote path, which already knows how to call
data_update() after a read: we now automatically rewrite bad data when
we get a read error and then successfully retry from a different
replica.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent b1d63b06
...@@ -37,8 +37,8 @@ static void bch2_extent_crc_pack(union bch_extent_crc *, ...@@ -37,8 +37,8 @@ static void bch2_extent_crc_pack(union bch_extent_crc *,
struct bch_extent_crc_unpacked, struct bch_extent_crc_unpacked,
enum bch_extent_entry_type); enum bch_extent_entry_type);
static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f, struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *f,
unsigned dev) unsigned dev)
{ {
struct bch_dev_io_failures *i; struct bch_dev_io_failures *i;
...@@ -52,7 +52,7 @@ static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f, ...@@ -52,7 +52,7 @@ static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
void bch2_mark_io_failure(struct bch_io_failures *failed, void bch2_mark_io_failure(struct bch_io_failures *failed,
struct extent_ptr_decoded *p) struct extent_ptr_decoded *p)
{ {
struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev); struct bch_dev_io_failures *f = bch2_dev_io_failures(failed, p->ptr.dev);
if (!f) { if (!f) {
BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs)); BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
...@@ -140,7 +140,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, ...@@ -140,7 +140,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
if (p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr))) if (p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr)))
continue; continue;
f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL; f = failed ? bch2_dev_io_failures(failed, p.ptr.dev) : NULL;
if (f) if (f)
p.idx = f->nr_failed < f->nr_retries p.idx = f->nr_failed < f->nr_retries
? f->idx ? f->idx
......
...@@ -399,6 +399,8 @@ out: \ ...@@ -399,6 +399,8 @@ out: \
/* utility code common to all keys with pointers: */ /* utility code common to all keys with pointers: */
struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *,
unsigned);
void bch2_mark_io_failure(struct bch_io_failures *, void bch2_mark_io_failure(struct bch_io_failures *,
struct extent_ptr_decoded *); struct extent_ptr_decoded *);
int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
......
...@@ -93,21 +93,24 @@ static const struct rhashtable_params bch_promote_params = { ...@@ -93,21 +93,24 @@ static const struct rhashtable_params bch_promote_params = {
static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
struct bpos pos, struct bpos pos,
struct bch_io_opts opts, struct bch_io_opts opts,
unsigned flags) unsigned flags,
struct bch_io_failures *failed)
{ {
BUG_ON(!opts.promote_target); if (!failed) {
BUG_ON(!opts.promote_target);
if (!(flags & BCH_READ_MAY_PROMOTE)) if (!(flags & BCH_READ_MAY_PROMOTE))
return -BCH_ERR_nopromote_may_not; return -BCH_ERR_nopromote_may_not;
if (bch2_bkey_has_target(c, k, opts.promote_target)) if (bch2_bkey_has_target(c, k, opts.promote_target))
return -BCH_ERR_nopromote_already_promoted; return -BCH_ERR_nopromote_already_promoted;
if (bkey_extent_is_unwritten(k)) if (bkey_extent_is_unwritten(k))
return -BCH_ERR_nopromote_unwritten; return -BCH_ERR_nopromote_unwritten;
if (bch2_target_congested(c, opts.promote_target)) if (bch2_target_congested(c, opts.promote_target))
return -BCH_ERR_nopromote_congested; return -BCH_ERR_nopromote_congested;
}
if (rhashtable_lookup_fast(&c->promote_table, &pos, if (rhashtable_lookup_fast(&c->promote_table, &pos,
bch_promote_params)) bch_promote_params))
...@@ -164,7 +167,8 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, ...@@ -164,7 +167,8 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
struct extent_ptr_decoded *pick, struct extent_ptr_decoded *pick,
struct bch_io_opts opts, struct bch_io_opts opts,
unsigned sectors, unsigned sectors,
struct bch_read_bio **rbio) struct bch_read_bio **rbio,
struct bch_io_failures *failed)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct promote_op *op = NULL; struct promote_op *op = NULL;
...@@ -217,14 +221,28 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, ...@@ -217,14 +221,28 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
bio = &op->write.op.wbio.bio; bio = &op->write.op.wbio.bio;
bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0); bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
struct data_update_opts update_opts = {};
if (!failed) {
update_opts.target = opts.promote_target;
update_opts.extra_replicas = 1;
update_opts.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED;
} else {
update_opts.target = opts.foreground_target;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
unsigned i = 0;
bkey_for_each_ptr(ptrs, ptr) {
if (bch2_dev_io_failures(failed, ptr->dev))
update_opts.rewrite_ptrs |= BIT(i);
i++;
}
}
ret = bch2_data_update_init(trans, NULL, NULL, &op->write, ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
writepoint_hashed((unsigned long) current), writepoint_hashed((unsigned long) current),
opts, opts,
(struct data_update_opts) { update_opts,
.target = opts.promote_target,
.extra_replicas = 1,
.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED,
},
btree_id, k); btree_id, k);
/* /*
* possible errors: -BCH_ERR_nocow_lock_blocked, * possible errors: -BCH_ERR_nocow_lock_blocked,
...@@ -258,10 +276,17 @@ static struct promote_op *promote_alloc(struct btree_trans *trans, ...@@ -258,10 +276,17 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
unsigned flags, unsigned flags,
struct bch_read_bio **rbio, struct bch_read_bio **rbio,
bool *bounce, bool *bounce,
bool *read_full) bool *read_full,
struct bch_io_failures *failed)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents); /*
* if failed != NULL we're not actually doing a promote, we're
* recovering from an io/checksum error
*/
bool promote_full = (failed ||
*read_full ||
READ_ONCE(c->promote_whole_extents));
/* data might have to be decompressed in the write path: */ /* data might have to be decompressed in the write path: */
unsigned sectors = promote_full unsigned sectors = promote_full
? max(pick->crc.compressed_size, pick->crc.live_size) ? max(pick->crc.compressed_size, pick->crc.live_size)
...@@ -272,7 +297,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans, ...@@ -272,7 +297,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
struct promote_op *promote; struct promote_op *promote;
int ret; int ret;
ret = should_promote(c, k, pos, opts, flags); ret = should_promote(c, k, pos, opts, flags, failed);
if (ret) if (ret)
goto nopromote; goto nopromote;
...@@ -280,7 +305,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans, ...@@ -280,7 +305,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
k.k->type == KEY_TYPE_reflink_v k.k->type == KEY_TYPE_reflink_v
? BTREE_ID_reflink ? BTREE_ID_reflink
: BTREE_ID_extents, : BTREE_ID_extents,
k, pos, pick, opts, sectors, rbio); k, pos, pick, opts, sectors, rbio, failed);
ret = PTR_ERR_OR_ZERO(promote); ret = PTR_ERR_OR_ZERO(promote);
if (ret) if (ret)
goto nopromote; goto nopromote;
...@@ -910,9 +935,9 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, ...@@ -910,9 +935,9 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
bounce = true; bounce = true;
} }
if (orig->opts.promote_target) if (orig->opts.promote_target)// || failed)
promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags, promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
&rbio, &bounce, &read_full); &rbio, &bounce, &read_full, failed);
if (!read_full) { if (!read_full) {
EBUG_ON(crc_is_compressed(pick.crc)); EBUG_ON(crc_is_compressed(pick.crc));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment