Commit 4409b808 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Repair pass for scanning for btree nodes

If a btree root or interior btree node goes bad, we're going to lose a
lot of data, unless we can recover the nodes that it pointed to by
scanning.

Fortunately btree node headers are fully self describing, and
additionally the magic number is xored with the filesytem UUID, so we
can do so safely.

This implements the scanning - next patch will rework topology repair to
make use of the found nodes.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent b268aa4e
......@@ -17,6 +17,7 @@ bcachefs-y := \
btree_journal_iter.o \
btree_key_cache.o \
btree_locking.o \
btree_node_scan.o \
btree_trans_commit.o \
btree_update.o \
btree_update_interior.o \
......
......@@ -456,6 +456,7 @@ enum bch_time_stats {
#include "alloc_types.h"
#include "btree_types.h"
#include "btree_node_scan_types.h"
#include "btree_write_buffer_types.h"
#include "buckets_types.h"
#include "buckets_waiting_for_journal_types.h"
......@@ -1103,6 +1104,8 @@ struct bch_fs {
struct journal_keys journal_keys;
struct list_head journal_iters;
struct find_btree_nodes found_btree_nodes;
u64 last_bucket_seq_cleanup;
u64 counters_on_mount[BCH_COUNTER_NR];
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_NODE_SCAN_H
#define _BCACHEFS_BTREE_NODE_SCAN_H
int bch2_scan_for_btree_nodes(struct bch_fs *);
bool bch2_btree_node_is_stale(struct bch_fs *, struct btree *);
bool bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id);
int bch2_get_scanned_nodes(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos);
void bch2_find_btree_nodes_exit(struct find_btree_nodes *);
#endif /* _BCACHEFS_BTREE_NODE_SCAN_H */
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_NODE_SCAN_TYPES_H
#define _BCACHEFS_BTREE_NODE_SCAN_TYPES_H
#include "darray.h"
struct found_btree_node {
bool range_updated:1;
bool overwritten:1;
u8 btree_id;
u8 level;
u32 seq;
u64 cookie;
struct bpos min_key;
struct bpos max_key;
unsigned nr_ptrs;
struct bch_extent_ptr ptrs[BCH_REPLICAS_MAX];
};
typedef DARRAY(struct found_btree_node) found_btree_nodes;
struct find_btree_nodes {
int ret;
struct mutex lock;
found_btree_nodes nodes;
};
#endif /* _BCACHEFS_BTREE_NODE_SCAN_TYPES_H */
......@@ -978,6 +978,31 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
return bkey_deleted(k.k);
}
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr)
{
struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
? bch_dev_bkey_exists(c, ptr->dev)
: NULL;
if (!ca) {
prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
(u64) ptr->offset, ptr->gen,
ptr->cached ? " cached" : "");
} else {
u32 offset;
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
prt_printf(out, "ptr: %u:%llu:%u gen %u",
ptr->dev, b, offset, ptr->gen);
if (ptr->cached)
prt_str(out, " cached");
if (ptr->unwritten)
prt_str(out, " unwritten");
if (ca && ptr_stale(ca, ptr))
prt_printf(out, " stale");
}
}
void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
......@@ -993,31 +1018,10 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, " ");
switch (__extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_ptr: {
const struct bch_extent_ptr *ptr = entry_to_ptr(entry);
struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
? bch_dev_bkey_exists(c, ptr->dev)
: NULL;
if (!ca) {
prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
(u64) ptr->offset, ptr->gen,
ptr->cached ? " cached" : "");
} else {
u32 offset;
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
prt_printf(out, "ptr: %u:%llu:%u gen %u",
ptr->dev, b, offset, ptr->gen);
if (ptr->cached)
prt_str(out, " cached");
if (ptr->unwritten)
prt_str(out, " unwritten");
if (ca && ptr_stale(ca, ptr))
prt_printf(out, " stale");
}
case BCH_EXTENT_ENTRY_ptr:
bch2_extent_ptr_to_text(out, c, entry_to_ptr(entry));
break;
}
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
case BCH_EXTENT_ENTRY_crc128: {
......
......@@ -676,6 +676,7 @@ bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);
void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c,
......
......@@ -368,11 +368,11 @@ enum fsck_err_opts {
OPT_STR_NOLIMIT(bch2_recovery_passes), \
BCH2_NO_SB_OPT, 0, \
NULL, "Exit recovery after specified pass") \
x(keep_journal, u8, \
x(retain_recovery_info, u8, \
0, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
NULL, "Don't free journal entries/keys after startup")\
NULL, "Don't free journal entries/keys, scanned btree nodes after startup")\
x(read_entire_journal, u8, \
0, \
OPT_BOOL(), \
......
......@@ -4,6 +4,7 @@
#include "alloc_background.h"
#include "bkey_buf.h"
#include "btree_journal_iter.h"
#include "btree_node_scan.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_io.h"
......@@ -271,7 +272,7 @@ int bch2_journal_replay(struct bch_fs *c)
bch2_trans_put(trans);
trans = NULL;
if (!c->opts.keep_journal &&
if (!c->opts.retain_recovery_info &&
c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay)
bch2_journal_keys_put_initial(c);
......@@ -435,10 +436,9 @@ static int journal_replay_early(struct bch_fs *c,
static int read_btree_roots(struct bch_fs *c)
{
unsigned i;
int ret = 0;
for (i = 0; i < btree_id_nr_alive(c); i++) {
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
if (!r->alive)
......@@ -447,33 +447,36 @@ static int read_btree_roots(struct bch_fs *c)
if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc)
continue;
if (r->error) {
__fsck_err(c,
btree_id_is_alloc(i)
? FSCK_CAN_IGNORE : 0,
btree_root_bkey_invalid,
"invalid btree root %s",
bch2_btree_id_str(i));
if (i == BTREE_ID_alloc)
if (mustfix_fsck_err_on((ret = r->error),
c, btree_root_bkey_invalid,
"invalid btree root %s",
bch2_btree_id_str(i)) ||
mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)),
c, btree_root_read_error,
"error reading btree root %s l=%u: %s",
bch2_btree_id_str(i), r->level, bch2_err_str(ret))) {
if (btree_id_is_alloc(i)) {
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations);
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info);
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus);
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers);
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs);
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
}
r->error = 0;
} else if (!(c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) {
bch_info(c, "will run btree node scan");
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes);
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
}
ret = bch2_btree_root_read(c, i, &r->key, r->level);
if (ret) {
fsck_err(c,
btree_root_read_error,
"error reading btree root %s",
bch2_btree_id_str(i));
if (btree_id_is_alloc(i))
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
ret = 0;
}
}
for (i = 0; i < BTREE_ID_NR; i++) {
for (unsigned i = 0; i < BTREE_ID_NR; i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
if (!r->b) {
if (!r->b && !r->error) {
r->alive = false;
r->level = 0;
bch2_btree_root_alloc_fake(c, i, 0);
......@@ -653,7 +656,7 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
if (!c->sb.clean || c->opts.fsck || c->opts.retain_recovery_info) {
struct genradix_iter iter;
struct journal_replay **i;
......@@ -883,9 +886,10 @@ int bch2_fs_recovery(struct bch_fs *c)
out:
bch2_flush_fsck_errs(c);
if (!c->opts.keep_journal &&
test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
if (!c->opts.retain_recovery_info) {
bch2_journal_keys_put_initial(c);
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
}
kfree(clean);
if (!ret &&
......
......@@ -4,6 +4,7 @@
#include "alloc_background.h"
#include "backpointers.h"
#include "btree_gc.h"
#include "btree_node_scan.h"
#include "ec.h"
#include "fsck.h"
#include "inode.h"
......
......@@ -13,6 +13,7 @@
* must never change:
*/
#define BCH_RECOVERY_PASSES() \
x(scan_for_btree_nodes, 37, 0) \
x(check_topology, 4, 0) \
x(alloc_read, 0, PASS_ALWAYS) \
x(stripes_read, 1, PASS_ALWAYS) \
......
......@@ -15,6 +15,7 @@
#include "btree_gc.h"
#include "btree_journal_iter.h"
#include "btree_key_cache.h"
#include "btree_node_scan.h"
#include "btree_update_interior.h"
#include "btree_io.h"
#include "btree_write_buffer.h"
......@@ -536,6 +537,7 @@ static void __bch2_fs_free(struct bch_fs *c)
for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_exit(&c->times[i]);
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_counters_exit(c);
......@@ -560,6 +562,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c);
bch2_journal_keys_put_initial(c);
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
BUG_ON(atomic_read(&c->journal_keys.ref));
bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment