Commit 5bd95a37 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: new avoid mechanism for io retries

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 47799326
...@@ -1345,11 +1345,9 @@ static void btree_node_read_work(struct work_struct *work) ...@@ -1345,11 +1345,9 @@ static void btree_node_read_work(struct work_struct *work)
struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev); struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
struct btree *b = rb->bio.bi_private; struct btree *b = rb->bio.bi_private;
struct bio *bio = &rb->bio; struct bio *bio = &rb->bio;
struct bch_devs_mask avoid; struct bch_io_failures failed = { .nr = 0 };
bool can_retry; bool can_retry;
memset(&avoid, 0, sizeof(avoid));
goto start; goto start;
while (1) { while (1) {
bch_info(c, "retrying read"); bch_info(c, "retrying read");
...@@ -1371,8 +1369,9 @@ static void btree_node_read_work(struct work_struct *work) ...@@ -1371,8 +1369,9 @@ static void btree_node_read_work(struct work_struct *work)
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
rb->have_ioref = false; rb->have_ioref = false;
__set_bit(rb->pick.ptr.dev, avoid.d); bch2_mark_io_failure(&failed, &rb->pick);
can_retry = bch2_btree_pick_ptr(c, b, &avoid, &rb->pick) > 0;
can_retry = bch2_btree_pick_ptr(c, b, &failed, &rb->pick) > 0;
if (!bio->bi_status && if (!bio->bi_status &&
!bch2_btree_node_read_done(c, b, can_retry)) !bch2_btree_node_read_done(c, b, can_retry))
......
...@@ -519,12 +519,45 @@ static size_t extent_print_ptrs(struct bch_fs *c, char *buf, ...@@ -519,12 +519,45 @@ static size_t extent_print_ptrs(struct bch_fs *c, char *buf,
return out - buf; return out - buf;
} }
static inline bool dev_latency_better(struct bch_fs *c, static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
const struct bch_extent_ptr *ptr1, unsigned dev)
const struct bch_extent_ptr *ptr2)
{ {
struct bch_dev *dev1 = bch_dev_bkey_exists(c, ptr1->dev); struct bch_dev_io_failures *i;
struct bch_dev *dev2 = bch_dev_bkey_exists(c, ptr2->dev);
for (i = f->devs; i < f->devs + f->nr; i++)
if (i->dev == dev)
return i;
return NULL;
}
void bch2_mark_io_failure(struct bch_io_failures *failed,
struct extent_ptr_decoded *p)
{
struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
if (!f) {
BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
f = &failed->devs[failed->nr++];
f->dev = p->ptr.dev;
f->nr_failed = 1;
f->nr_retries = 0;
} else {
f->nr_failed++;
}
}
/*
* returns true if p1 is better than p2:
*/
static inline bool ptr_better(struct bch_fs *c,
const struct extent_ptr_decoded p1,
const struct extent_ptr_decoded p2)
{
struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
u64 l1 = atomic64_read(&dev1->cur_latency[READ]); u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
u64 l2 = atomic64_read(&dev2->cur_latency[READ]); u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
...@@ -535,11 +568,12 @@ static inline bool dev_latency_better(struct bch_fs *c, ...@@ -535,11 +568,12 @@ static inline bool dev_latency_better(struct bch_fs *c,
static int extent_pick_read_device(struct bch_fs *c, static int extent_pick_read_device(struct bch_fs *c,
struct bkey_s_c_extent e, struct bkey_s_c_extent e,
struct bch_devs_mask *avoid, struct bch_io_failures *failed,
struct extent_ptr_decoded *pick) struct extent_ptr_decoded *pick)
{ {
const union bch_extent_entry *entry; const union bch_extent_entry *entry;
struct extent_ptr_decoded p; struct extent_ptr_decoded p;
struct bch_dev_io_failures *f;
struct bch_dev *ca; struct bch_dev *ca;
int ret = 0; int ret = 0;
...@@ -549,14 +583,11 @@ static int extent_pick_read_device(struct bch_fs *c, ...@@ -549,14 +583,11 @@ static int extent_pick_read_device(struct bch_fs *c,
if (p.ptr.cached && ptr_stale(ca, &p.ptr)) if (p.ptr.cached && ptr_stale(ca, &p.ptr))
continue; continue;
/* f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
* XXX: need to make avoid work correctly for stripe ptrs if (f && f->nr_failed >= f->nr_retries)
*/
if (avoid && test_bit(p.ptr.dev, avoid->d))
continue; continue;
if (ret && !dev_latency_better(c, &p.ptr, &pick->ptr)) if (ret && !ptr_better(c, p, *pick))
continue; continue;
*pick = p; *pick = p;
...@@ -685,11 +716,11 @@ int bch2_btree_ptr_to_text(struct bch_fs *c, char *buf, ...@@ -685,11 +716,11 @@ int bch2_btree_ptr_to_text(struct bch_fs *c, char *buf,
} }
int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b, int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b,
struct bch_devs_mask *avoid, struct bch_io_failures *failed,
struct extent_ptr_decoded *pick) struct extent_ptr_decoded *pick)
{ {
return extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key), return extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key),
avoid, pick); failed, pick);
} }
/* Extents */ /* Extents */
...@@ -1909,7 +1940,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c, ...@@ -1909,7 +1940,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
* other devices, it will still pick a pointer from avoid. * other devices, it will still pick a pointer from avoid.
*/ */
int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k, int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
struct bch_devs_mask *avoid, struct bch_io_failures *failed,
struct extent_ptr_decoded *pick) struct extent_ptr_decoded *pick)
{ {
int ret; int ret;
...@@ -1921,7 +1952,7 @@ int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k, ...@@ -1921,7 +1952,7 @@ int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
case BCH_EXTENT: case BCH_EXTENT:
case BCH_EXTENT_CACHED: case BCH_EXTENT_CACHED:
ret = extent_pick_read_device(c, bkey_s_c_to_extent(k), ret = extent_pick_read_device(c, bkey_s_c_to_extent(k),
avoid, pick); failed, pick);
if (!ret && !bkey_extent_is_cached(k.k)) if (!ret && !bkey_extent_is_cached(k.k))
ret = -EIO; ret = -EIO;
......
...@@ -53,12 +53,13 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, ...@@ -53,12 +53,13 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
struct btree *, struct btree *,
struct btree_node_iter_large *); struct btree_node_iter_large *);
void bch2_mark_io_failure(struct bch_io_failures *,
struct extent_ptr_decoded *);
int bch2_btree_pick_ptr(struct bch_fs *, const struct btree *, int bch2_btree_pick_ptr(struct bch_fs *, const struct btree *,
struct bch_devs_mask *avoid, struct bch_io_failures *,
struct extent_ptr_decoded *); struct extent_ptr_decoded *);
int bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c, int bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c,
struct bch_devs_mask *, struct bch_io_failures *,
struct extent_ptr_decoded *); struct extent_ptr_decoded *);
void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *); void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
......
...@@ -24,4 +24,13 @@ struct extent_ptr_decoded { ...@@ -24,4 +24,13 @@ struct extent_ptr_decoded {
struct bch_extent_ptr ptr; struct bch_extent_ptr ptr;
}; };
struct bch_io_failures {
u8 nr;
struct bch_dev_io_failures {
u8 dev;
u8 nr_failed;
u8 nr_retries;
} devs[BCH_REPLICAS_MAX];
};
#endif /* _BCACHEFS_EXTENTS_TYPES_H */ #endif /* _BCACHEFS_EXTENTS_TYPES_H */
...@@ -1203,7 +1203,8 @@ static void bch2_rbio_done(struct bch_read_bio *rbio) ...@@ -1203,7 +1203,8 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio, static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
struct bvec_iter bvec_iter, u64 inode, struct bvec_iter bvec_iter, u64 inode,
struct bch_devs_mask *avoid, unsigned flags) struct bch_io_failures *failed,
unsigned flags)
{ {
struct btree_iter iter; struct btree_iter iter;
BKEY_PADDED(k) tmp; BKEY_PADDED(k) tmp;
...@@ -1237,7 +1238,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio ...@@ -1237,7 +1238,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
goto out; goto out;
} }
ret = __bch2_read_extent(c, rbio, bvec_iter, k, avoid, flags); ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
if (ret == READ_RETRY) if (ret == READ_RETRY)
goto retry; goto retry;
if (ret) if (ret)
...@@ -1251,7 +1252,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio ...@@ -1251,7 +1252,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
struct bvec_iter bvec_iter, u64 inode, struct bvec_iter bvec_iter, u64 inode,
struct bch_devs_mask *avoid, unsigned flags) struct bch_io_failures *failed, unsigned flags)
{ {
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
...@@ -1274,7 +1275,7 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, ...@@ -1274,7 +1275,7 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
(k.k->p.offset - bvec_iter.bi_sector) << 9); (k.k->p.offset - bvec_iter.bi_sector) << 9);
swap(bvec_iter.bi_size, bytes); swap(bvec_iter.bi_size, bytes);
ret = __bch2_read_extent(c, rbio, bvec_iter, k, avoid, flags); ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
switch (ret) { switch (ret) {
case READ_RETRY: case READ_RETRY:
goto retry; goto retry;
...@@ -1310,14 +1311,12 @@ static void bch2_rbio_retry(struct work_struct *work) ...@@ -1310,14 +1311,12 @@ static void bch2_rbio_retry(struct work_struct *work)
struct bvec_iter iter = rbio->bvec_iter; struct bvec_iter iter = rbio->bvec_iter;
unsigned flags = rbio->flags; unsigned flags = rbio->flags;
u64 inode = rbio->pos.inode; u64 inode = rbio->pos.inode;
struct bch_devs_mask avoid; struct bch_io_failures failed = { .nr = 0 };
trace_read_retry(&rbio->bio); trace_read_retry(&rbio->bio);
memset(&avoid, 0, sizeof(avoid));
if (rbio->retry == READ_RETRY_AVOID) if (rbio->retry == READ_RETRY_AVOID)
__set_bit(rbio->pick.ptr.dev, avoid.d); bch2_mark_io_failure(&failed, &rbio->pick);
rbio->bio.bi_status = 0; rbio->bio.bi_status = 0;
...@@ -1327,9 +1326,9 @@ static void bch2_rbio_retry(struct work_struct *work) ...@@ -1327,9 +1326,9 @@ static void bch2_rbio_retry(struct work_struct *work)
flags &= ~BCH_READ_MAY_PROMOTE; flags &= ~BCH_READ_MAY_PROMOTE;
if (flags & BCH_READ_NODECODE) if (flags & BCH_READ_NODECODE)
bch2_read_retry_nodecode(c, rbio, iter, inode, &avoid, flags); bch2_read_retry_nodecode(c, rbio, iter, inode, &failed, flags);
else else
bch2_read_retry(c, rbio, iter, inode, &avoid, flags); bch2_read_retry(c, rbio, iter, inode, &failed, flags);
} }
static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
...@@ -1569,7 +1568,7 @@ static void bch2_read_endio(struct bio *bio) ...@@ -1569,7 +1568,7 @@ static void bch2_read_endio(struct bio *bio)
int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
struct bvec_iter iter, struct bkey_s_c k, struct bvec_iter iter, struct bkey_s_c k,
struct bch_devs_mask *avoid, unsigned flags) struct bch_io_failures *failed, unsigned flags)
{ {
struct extent_ptr_decoded pick; struct extent_ptr_decoded pick;
struct bch_read_bio *rbio = NULL; struct bch_read_bio *rbio = NULL;
...@@ -1579,7 +1578,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, ...@@ -1579,7 +1578,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
struct bpos pos = bkey_start_pos(k.k); struct bpos pos = bkey_start_pos(k.k);
int pick_ret; int pick_ret;
pick_ret = bch2_extent_pick_ptr(c, k, avoid, &pick); pick_ret = bch2_extent_pick_ptr(c, k, failed, &pick);
/* hole or reservation - just zero fill: */ /* hole or reservation - just zero fill: */
if (!pick_ret) if (!pick_ret)
...@@ -1750,7 +1749,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, ...@@ -1750,7 +1749,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
rbio = bch2_rbio_free(rbio); rbio = bch2_rbio_free(rbio);
if (ret == READ_RETRY_AVOID) { if (ret == READ_RETRY_AVOID) {
__set_bit(pick.ptr.dev, avoid->d); bch2_mark_io_failure(failed, &pick);
ret = READ_RETRY; ret = READ_RETRY;
} }
......
...@@ -102,7 +102,7 @@ struct cache_promote_op; ...@@ -102,7 +102,7 @@ struct cache_promote_op;
struct extent_ptr_decoded; struct extent_ptr_decoded;
int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *, struct bvec_iter, int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
struct bkey_s_c, struct bch_devs_mask *, unsigned); struct bkey_s_c, struct bch_io_failures *, unsigned);
void bch2_read(struct bch_fs *, struct bch_read_bio *, u64); void bch2_read(struct bch_fs *, struct bch_read_bio *, u64);
enum bch_read_flags { enum bch_read_flags {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment