Commit 0fefe8d8 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Improve some IO error messages

it's useful to know whether an error was for a read or a write - this
also standardizes error messages a bit more.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent f299d573
...@@ -214,9 +214,11 @@ ...@@ -214,9 +214,11 @@
dynamic_fault("bcachefs:meta:write:" name) dynamic_fault("bcachefs:meta:write:" name)
#ifdef __KERNEL__ #ifdef __KERNEL__
#define bch2_fmt(_c, fmt) "bcachefs (%s): " fmt "\n", ((_c)->name) #define bch2_fmt(_c, fmt) "bcachefs (%s): " fmt "\n", ((_c)->name)
#define bch2_fmt_inum(_c, _inum, fmt) "bcachefs (%s inum %llu): " fmt "\n", ((_c)->name), (_inum)
#else #else
#define bch2_fmt(_c, fmt) fmt "\n" #define bch2_fmt(_c, fmt) fmt "\n"
#define bch2_fmt_inum(_c, _inum, fmt) "inum %llu: " fmt "\n", (_inum)
#endif #endif
#define bch_info(c, fmt, ...) \ #define bch_info(c, fmt, ...) \
...@@ -229,8 +231,11 @@ ...@@ -229,8 +231,11 @@
printk_ratelimited(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) printk_ratelimited(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_err(c, fmt, ...) \ #define bch_err(c, fmt, ...) \
printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_err_ratelimited(c, fmt, ...) \ #define bch_err_ratelimited(c, fmt, ...) \
printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_err_inum_ratelimited(c, _inum, fmt, ...) \
printk_ratelimited(KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
#define bch_verbose(c, fmt, ...) \ #define bch_verbose(c, fmt, ...) \
do { \ do { \
......
...@@ -597,18 +597,25 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b, ...@@ -597,18 +597,25 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b,
bch2_btree_iter_reinit_node(iter, b); bch2_btree_iter_reinit_node(iter, b);
} }
static void btree_pos_to_text(struct printbuf *out, struct bch_fs *c,
struct btree *b)
{
pr_buf(out, "%s level %u/%u\n ",
bch2_btree_ids[b->c.btree_id],
b->c.level,
c->btree_roots[b->c.btree_id].level);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
}
static void btree_err_msg(struct printbuf *out, struct bch_fs *c, static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
struct btree *b, struct bset *i, struct btree *b, struct bset *i,
unsigned offset, int write) unsigned offset, int write)
{ {
pr_buf(out, "error validating btree node %sat btree %u level %u/%u\n" pr_buf(out, "error validating btree node %sat btree ",
"pos ", write ? "before write " : "");
write ? "before write " : "", btree_pos_to_text(out, c, b);
b->c.btree_id, b->c.level,
c->btree_roots[b->c.btree_id].level);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
pr_buf(out, " node offset %u", b->written); pr_buf(out, "\n node offset %u", b->written);
if (i) if (i)
pr_buf(out, " bset u64s %u", le16_to_cpu(i->u64s)); pr_buf(out, " bset u64s %u", le16_to_cpu(i->u64s));
} }
...@@ -1104,6 +1111,8 @@ static void btree_node_read_work(struct work_struct *work) ...@@ -1104,6 +1111,8 @@ static void btree_node_read_work(struct work_struct *work)
struct btree *b = rb->bio.bi_private; struct btree *b = rb->bio.bi_private;
struct bio *bio = &rb->bio; struct bio *bio = &rb->bio;
struct bch_io_failures failed = { .nr = 0 }; struct bch_io_failures failed = { .nr = 0 };
char buf[200];
struct printbuf out;
bool can_retry; bool can_retry;
goto start; goto start;
...@@ -1122,8 +1131,10 @@ static void btree_node_read_work(struct work_struct *work) ...@@ -1122,8 +1131,10 @@ static void btree_node_read_work(struct work_struct *work)
bio->bi_status = BLK_STS_REMOVED; bio->bi_status = BLK_STS_REMOVED;
} }
start: start:
bch2_dev_io_err_on(bio->bi_status, ca, "btree read: %s", out = PBUF(buf);
bch2_blk_status_to_str(bio->bi_status)); btree_pos_to_text(&out, c, b);
bch2_dev_io_err_on(bio->bi_status, ca, "btree read error %s for %s",
bch2_blk_status_to_str(bio->bi_status), buf);
if (rb->have_ioref) if (rb->have_ioref)
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
rb->have_ioref = false; rb->have_ioref = false;
...@@ -1408,7 +1419,7 @@ static void btree_node_write_endio(struct bio *bio) ...@@ -1408,7 +1419,7 @@ static void btree_node_write_endio(struct bio *bio)
if (wbio->have_ioref) if (wbio->have_ioref)
bch2_latency_acct(ca, wbio->submit_time, WRITE); bch2_latency_acct(ca, wbio->submit_time, WRITE);
if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write: %s", if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write error: %s",
bch2_blk_status_to_str(bio->bi_status)) || bch2_blk_status_to_str(bio->bi_status)) ||
bch2_meta_write_fault("btree")) { bch2_meta_write_fault("btree")) {
spin_lock_irqsave(&c->btree_write_error_lock, flags); spin_lock_irqsave(&c->btree_write_error_lock, flags);
......
...@@ -264,7 +264,7 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) ...@@ -264,7 +264,7 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
len << 9); len << 9);
if (memcmp(stripe_csum(v, i, j), &csum, csum_bytes)) { if (memcmp(stripe_csum(v, i, j), &csum, csum_bytes)) {
__bcache_io_error(c, bch_err_ratelimited(c,
"checksum error while doing reconstruct read (%u:%u)", "checksum error while doing reconstruct read (%u:%u)",
i, j); i, j);
clear_bit(i, buf->valid); clear_bit(i, buf->valid);
...@@ -305,7 +305,7 @@ static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf) ...@@ -305,7 +305,7 @@ static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
unsigned bytes = buf->size << 9; unsigned bytes = buf->size << 9;
if (ec_nr_failed(buf) > v->nr_redundant) { if (ec_nr_failed(buf) > v->nr_redundant) {
__bcache_io_error(c, bch_err_ratelimited(c,
"error doing reconstruct read: unable to read enough blocks"); "error doing reconstruct read: unable to read enough blocks");
return -1; return -1;
} }
...@@ -326,7 +326,7 @@ static void ec_block_endio(struct bio *bio) ...@@ -326,7 +326,7 @@ static void ec_block_endio(struct bio *bio)
struct bch_dev *ca = ec_bio->ca; struct bch_dev *ca = ec_bio->ca;
struct closure *cl = bio->bi_private; struct closure *cl = bio->bi_private;
if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s: %s", if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s error: %s",
bio_data_dir(bio) ? "write" : "read", bio_data_dir(bio) ? "write" : "read",
bch2_blk_status_to_str(bio->bi_status))) bch2_blk_status_to_str(bio->bi_status)))
clear_bit(ec_bio->idx, ec_bio->buf->valid); clear_bit(ec_bio->idx, ec_bio->buf->valid);
...@@ -420,7 +420,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) ...@@ -420,7 +420,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
BTREE_ITER_SLOTS); BTREE_ITER_SLOTS);
k = bch2_btree_iter_peek_slot(iter); k = bch2_btree_iter_peek_slot(iter);
if (bkey_err(k) || k.k->type != KEY_TYPE_stripe) { if (bkey_err(k) || k.k->type != KEY_TYPE_stripe) {
__bcache_io_error(c, bch_err_ratelimited(c,
"error doing reconstruct read: stripe not found"); "error doing reconstruct read: stripe not found");
kfree(buf); kfree(buf);
return bch2_trans_exit(&trans) ?: -EIO; return bch2_trans_exit(&trans) ?: -EIO;
...@@ -462,7 +462,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) ...@@ -462,7 +462,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
if (ptr_stale(ca, ptr)) { if (ptr_stale(ca, ptr)) {
__bcache_io_error(c, bch_err_ratelimited(c,
"error doing reconstruct read: stale pointer"); "error doing reconstruct read: stale pointer");
clear_bit(i, buf->valid); clear_bit(i, buf->valid);
continue; continue;
...@@ -474,7 +474,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) ...@@ -474,7 +474,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
closure_sync(&cl); closure_sync(&cl);
if (ec_nr_failed(buf) > v->nr_redundant) { if (ec_nr_failed(buf) > v->nr_redundant) {
__bcache_io_error(c, bch_err_ratelimited(c,
"error doing reconstruct read: unable to read enough blocks"); "error doing reconstruct read: unable to read enough blocks");
ret = -EIO; ret = -EIO;
goto err; goto err;
......
...@@ -181,12 +181,18 @@ void bch2_io_error(struct bch_dev *); ...@@ -181,12 +181,18 @@ void bch2_io_error(struct bch_dev *);
/* Logs message and handles the error: */ /* Logs message and handles the error: */
#define bch2_dev_io_error(ca, fmt, ...) \ #define bch2_dev_io_error(ca, fmt, ...) \
do { \ do { \
printk_ratelimited(KERN_ERR bch2_fmt((ca)->fs, \ printk_ratelimited(KERN_ERR "bcachefs (%s): " fmt, \
"IO error on %s for " fmt), \
(ca)->name, ##__VA_ARGS__); \ (ca)->name, ##__VA_ARGS__); \
bch2_io_error(ca); \ bch2_io_error(ca); \
} while (0) } while (0)
#define bch2_dev_inum_io_error(ca, _inum, _offset, fmt, ...) \
do { \
printk_ratelimited(KERN_ERR "bcachefs (%s inum %llu offset %llu): " fmt,\
(ca)->name, (_inum), (_offset), ##__VA_ARGS__); \
bch2_io_error(ca); \
} while (0)
#define bch2_dev_io_err_on(cond, ca, ...) \ #define bch2_dev_io_err_on(cond, ca, ...) \
({ \ ({ \
bool _ret = (cond); \ bool _ret = (cond); \
...@@ -196,16 +202,13 @@ do { \ ...@@ -196,16 +202,13 @@ do { \
_ret; \ _ret; \
}) })
/* kill? */ #define bch2_dev_inum_io_err_on(cond, ca, _inum, _offset, ...) \
({ \
#define __bcache_io_error(c, fmt, ...) \ bool _ret = (cond); \
printk_ratelimited(KERN_ERR bch2_fmt(c, \ \
"IO error: " fmt), ##__VA_ARGS__) if (_ret) \
bch2_dev_inum_io_error(ca, _inum, _offset, __VA_ARGS__);\
#define bcache_io_error(c, bio, fmt, ...) \ _ret; \
do { \ })
__bcache_io_error(c, fmt, ##__VA_ARGS__); \
(bio)->bi_status = BLK_STS_IOERR; \
} while (0)
#endif /* _BCACHEFS_ERROR_H */ #endif /* _BCACHEFS_ERROR_H */
...@@ -839,7 +839,9 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, ...@@ -839,7 +839,9 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
goto retry; goto retry;
if (ret) { if (ret) {
bcache_io_error(c, &rbio->bio, "btree IO error %i", ret); bch_err_inum_ratelimited(c, inum,
"read error %i from btree lookup", ret);
rbio->bio.bi_status = BLK_STS_IOERR;
bio_endio(&rbio->bio); bio_endio(&rbio->bio);
} }
......
...@@ -588,7 +588,8 @@ static void __bch2_write_index(struct bch_write_op *op) ...@@ -588,7 +588,8 @@ static void __bch2_write_index(struct bch_write_op *op)
op->written += sectors_start - keylist_sectors(keys); op->written += sectors_start - keylist_sectors(keys);
if (ret) { if (ret) {
__bcache_io_error(c, "btree IO error %i", ret); bch_err_inum_ratelimited(c, op->pos.inode,
"write error %i from btree update", ret);
op->error = ret; op->error = ret;
} }
} }
...@@ -633,7 +634,10 @@ static void bch2_write_endio(struct bio *bio) ...@@ -633,7 +634,10 @@ static void bch2_write_endio(struct bio *bio)
struct bch_fs *c = wbio->c; struct bch_fs *c = wbio->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev); struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev);
if (bch2_dev_io_err_on(bio->bi_status, ca, "data write: %s", if (bch2_dev_inum_io_err_on(bio->bi_status, ca,
op->pos.inode,
op->pos.offset - bio_sectors(bio), /* XXX definitely wrong */
"data write error: %s",
bch2_blk_status_to_str(bio->bi_status))) bch2_blk_status_to_str(bio->bi_status)))
set_bit(wbio->dev, op->failed.d); set_bit(wbio->dev, op->failed.d);
...@@ -1276,15 +1280,14 @@ void bch2_write(struct closure *cl) ...@@ -1276,15 +1280,14 @@ void bch2_write(struct closure *cl)
wbio_init(bio)->put_bio = false; wbio_init(bio)->put_bio = false;
if (bio_sectors(bio) & (c->opts.block_size - 1)) { if (bio_sectors(bio) & (c->opts.block_size - 1)) {
__bcache_io_error(c, "misaligned write"); bch_err_inum_ratelimited(c, op->pos.inode,
"misaligned write");
op->error = -EIO; op->error = -EIO;
goto err; goto err;
} }
if (c->opts.nochanges || if (c->opts.nochanges ||
!percpu_ref_tryget(&c->writes)) { !percpu_ref_tryget(&c->writes)) {
if (!(op->flags & BCH_WRITE_FROM_INTERNAL))
__bcache_io_error(c, "read only");
op->error = -EROFS; op->error = -EROFS;
goto err; goto err;
} }
...@@ -1707,7 +1710,8 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, ...@@ -1707,7 +1710,8 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
* reading a btree node * reading a btree node
*/ */
BUG_ON(!ret); BUG_ON(!ret);
__bcache_io_error(c, "btree IO error: %i", ret); bch_err_inum_ratelimited(c, inode,
"read error %i from btree lookup", ret);
err: err:
rbio->bio.bi_status = BLK_STS_IOERR; rbio->bio.bi_status = BLK_STS_IOERR;
out: out:
...@@ -1911,17 +1915,15 @@ static void __bch2_read_endio(struct work_struct *work) ...@@ -1911,17 +1915,15 @@ static void __bch2_read_endio(struct work_struct *work)
return; return;
} }
bch2_dev_io_error(ca, bch2_dev_inum_io_error(ca, rbio->pos.inode, (u64) rbio->bvec_iter.bi_sector,
"data checksum error, inode %llu offset %llu: expected %0llx:%0llx got %0llx:%0llx (type %u)", "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %u)",
rbio->pos.inode, (u64) rbio->bvec_iter.bi_sector,
rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo, rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
csum.hi, csum.lo, crc.csum_type); csum.hi, csum.lo, crc.csum_type);
bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
return; return;
decompression_err: decompression_err:
__bcache_io_error(c, "decompression error, inode %llu offset %llu", bch_err_inum_ratelimited(c, rbio->pos.inode,
rbio->pos.inode, "decompression error");
(u64) rbio->bvec_iter.bi_sector);
bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
return; return;
} }
...@@ -1943,7 +1945,14 @@ static void bch2_read_endio(struct bio *bio) ...@@ -1943,7 +1945,14 @@ static void bch2_read_endio(struct bio *bio)
if (!rbio->split) if (!rbio->split)
rbio->bio.bi_end_io = rbio->end_io; rbio->bio.bi_end_io = rbio->end_io;
if (bch2_dev_io_err_on(bio->bi_status, ca, "data read; %s", /*
* XXX: rbio->pos is not what we want here when reading from indirect
* extents
*/
if (bch2_dev_inum_io_err_on(bio->bi_status, ca,
rbio->pos.inode,
rbio->pos.offset,
"data read error: %s",
bch2_blk_status_to_str(bio->bi_status))) { bch2_blk_status_to_str(bio->bi_status))) {
bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
return; return;
...@@ -1993,7 +2002,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans, ...@@ -1993,7 +2002,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans,
if (k.k->type != KEY_TYPE_reflink_v && if (k.k->type != KEY_TYPE_reflink_v &&
k.k->type != KEY_TYPE_indirect_inline_data) { k.k->type != KEY_TYPE_indirect_inline_data) {
__bcache_io_error(trans->c, bch_err_inum_ratelimited(trans->c, orig_k->k->k.p.inode,
"pointer to nonexistent indirect extent"); "pointer to nonexistent indirect extent");
ret = -EIO; ret = -EIO;
goto err; goto err;
...@@ -2038,7 +2047,8 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, ...@@ -2038,7 +2047,8 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
goto hole; goto hole;
if (pick_ret < 0) { if (pick_ret < 0) {
__bcache_io_error(c, "no device to read from"); bch_err_inum_ratelimited(c, k.k->p.inode,
"no device to read from");
goto err; goto err;
} }
...@@ -2190,7 +2200,8 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, ...@@ -2190,7 +2200,8 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
if (!rbio->pick.idx) { if (!rbio->pick.idx) {
if (!rbio->have_ioref) { if (!rbio->have_ioref) {
__bcache_io_error(c, "no device to read from"); bch_err_inum_ratelimited(c, k.k->p.inode,
"no device to read from");
bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
goto out; goto out;
} }
...@@ -2345,7 +2356,9 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) ...@@ -2345,7 +2356,9 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
if (ret == -EINTR) if (ret == -EINTR)
goto retry; goto retry;
bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret); bch_err_inum_ratelimited(c, inode,
"read error %i from btree lookup", ret);
rbio->bio.bi_status = BLK_STS_IOERR;
bch2_rbio_done(rbio); bch2_rbio_done(rbio);
goto out; goto out;
} }
......
...@@ -557,7 +557,7 @@ static int journal_read_bucket(struct bch_dev *ca, ...@@ -557,7 +557,7 @@ static int journal_read_bucket(struct bch_dev *ca,
kfree(bio); kfree(bio);
if (bch2_dev_io_err_on(ret, ca, if (bch2_dev_io_err_on(ret, ca,
"journal read from sector %llu", "journal read error: sector %llu",
offset) || offset) ||
bch2_meta_read_fault("journal")) bch2_meta_read_fault("journal"))
return -EIO; return -EIO;
...@@ -1015,7 +1015,7 @@ static void journal_write_endio(struct bio *bio) ...@@ -1015,7 +1015,7 @@ static void journal_write_endio(struct bio *bio)
struct bch_dev *ca = bio->bi_private; struct bch_dev *ca = bio->bi_private;
struct journal *j = &ca->fs->journal; struct journal *j = &ca->fs->journal;
if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write: %s", if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write error: %s",
bch2_blk_status_to_str(bio->bi_status)) || bch2_blk_status_to_str(bio->bi_status)) ||
bch2_meta_write_fault("journal")) { bch2_meta_write_fault("journal")) {
struct journal_buf *w = journal_prev_buf(j); struct journal_buf *w = journal_prev_buf(j);
......
...@@ -640,7 +640,7 @@ static void write_super_endio(struct bio *bio) ...@@ -640,7 +640,7 @@ static void write_super_endio(struct bio *bio)
/* XXX: return errors directly */ /* XXX: return errors directly */
if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write: %s", if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write error: %s",
bch2_blk_status_to_str(bio->bi_status))) bch2_blk_status_to_str(bio->bi_status)))
ca->sb_write_error = 1; ca->sb_write_error = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment