Commit 19773ec9 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Disk accounting device validation fixes

- Fix failure to validate that accounting replicas entries point to
  valid devices: this wasn't a real bug since they'd be cleaned up by
  GC, but is still something we should know about

- Fix failure to validate that dev_data_type entries point to valid
  devices: this does fix a real bug, since bch2_accounting_read() would
  then try to copy the counters to that device and pop an inconsistent
  error when the device didn't exist

- Remove accounting entries that are zeroed or invalid: if we're not
  validating them we need to get rid of them: they might not exist in
  the superblock, so we need the to trigger the superblock mark path
  when they're readded.

  This fixes the replication.ktest rereplicate test, which was failing
  with "superblock not marked for replicas..."
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 9d861787
......@@ -242,6 +242,14 @@ void bch2_accounting_swab(struct bkey_s k)
*p = swab64(*p);
}
static inline void __accounting_to_replicas(struct bch_replicas_entry_v1 *r,
struct disk_accounting_pos acc)
{
unsafe_memcpy(r, &acc.replicas,
replicas_entry_bytes(&acc.replicas),
"variable length struct");
}
static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struct bpos p)
{
struct disk_accounting_pos acc_k;
......@@ -249,9 +257,7 @@ static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struc
switch (acc_k.type) {
case BCH_DISK_ACCOUNTING_replicas:
unsafe_memcpy(r, &acc_k.replicas,
replicas_entry_bytes(&acc_k.replicas),
"variable length struct");
__accounting_to_replicas(r, acc_k);
return true;
default:
return false;
......@@ -608,6 +614,81 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k)
return ret;
}
static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
struct disk_accounting_pos acc,
u64 *v, unsigned nr)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
int ret = 0, invalid_dev = -1;
switch (acc.type) {
case BCH_DISK_ACCOUNTING_replicas: {
struct bch_replicas_padded r;
__accounting_to_replicas(&r.e, acc);
for (unsigned i = 0; i < r.e.nr_devs; i++)
if (r.e.devs[i] != BCH_SB_MEMBER_INVALID &&
!bch2_dev_exists(c, r.e.devs[i])) {
invalid_dev = r.e.devs[i];
goto invalid_device;
}
/*
* All replicas entry checks except for invalid device are done
* in bch2_accounting_validate
*/
BUG_ON(bch2_replicas_entry_validate(&r.e, c, &buf));
if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e),
trans, accounting_replicas_not_marked,
"accounting not marked in superblock replicas\n %s",
(printbuf_reset(&buf),
bch2_accounting_key_to_text(&buf, &acc),
buf.buf))) {
/*
* We're not RW yet and still single threaded, dropping
* and retaking lock is ok:
*/
percpu_up_write(&c->mark_lock);
ret = bch2_mark_replicas(c, &r.e);
if (ret)
goto fsck_err;
percpu_down_write(&c->mark_lock);
}
break;
}
case BCH_DISK_ACCOUNTING_dev_data_type:
if (!bch2_dev_exists(c, acc.dev_data_type.dev)) {
invalid_dev = acc.dev_data_type.dev;
goto invalid_device;
}
break;
}
fsck_err:
printbuf_exit(&buf);
return ret;
invalid_device:
if (fsck_err(trans, accounting_to_invalid_device,
"accounting entry points to invalid device %i\n %s",
invalid_dev,
(printbuf_reset(&buf),
bch2_accounting_key_to_text(&buf, &acc),
buf.buf))) {
for (unsigned i = 0; i < nr; i++)
v[i] = -v[i];
ret = commit_do(trans, NULL, NULL, 0,
bch2_disk_accounting_mod(trans, &acc, v, nr, false)) ?:
-BCH_ERR_remove_disk_accounting_entry;
} else {
ret = -BCH_ERR_remove_disk_accounting_entry;
}
goto fsck_err;
}
/*
* At startup time, initialize the in memory accounting from the btree (and
* journal)
......@@ -666,44 +747,42 @@ int bch2_accounting_read(struct bch_fs *c)
}
keys->gap = keys->nr = dst - keys->data;
percpu_down_read(&c->mark_lock);
for (unsigned i = 0; i < acc->k.nr; i++) {
u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false);
percpu_down_write(&c->mark_lock);
unsigned i = 0;
while (i < acc->k.nr) {
unsigned idx = inorder_to_eytzinger0(i, acc->k.nr);
if (bch2_is_zero(v, sizeof(v[0]) * acc->k.data[i].nr_counters))
continue;
struct disk_accounting_pos acc_k;
bpos_to_disk_accounting_pos(&acc_k, acc->k.data[idx].pos);
struct bch_replicas_padded r;
if (!accounting_to_replicas(&r.e, acc->k.data[i].pos))
continue;
u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
bch2_accounting_mem_read_counters(acc, idx, v, ARRAY_SIZE(v), false);
/*
* If the replicas entry is invalid it'll get cleaned up by
* check_allocations:
* If the entry counters are zeroed, it should be treated as
* nonexistent - it might point to an invalid device.
*
* Remove it, so that if it's re-added it gets re-marked in the
* superblock:
*/
if (bch2_replicas_entry_validate(&r.e, c, &buf))
ret = bch2_is_zero(v, sizeof(v[0]) * acc->k.data[idx].nr_counters)
? -BCH_ERR_remove_disk_accounting_entry
: bch2_disk_accounting_validate_late(trans, acc_k,
v, acc->k.data[idx].nr_counters);
if (ret == -BCH_ERR_remove_disk_accounting_entry) {
free_percpu(acc->k.data[idx].v[0]);
free_percpu(acc->k.data[idx].v[1]);
darray_remove_item(&acc->k, &acc->k.data[idx]);
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, NULL);
ret = 0;
continue;
struct disk_accounting_pos k;
bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos);
if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e),
trans, accounting_replicas_not_marked,
"accounting not marked in superblock replicas\n %s",
(printbuf_reset(&buf),
bch2_accounting_key_to_text(&buf, &k),
buf.buf))) {
/*
* We're not RW yet and still single threaded, dropping
* and retaking lock is ok:
*/
percpu_up_read(&c->mark_lock);
ret = bch2_mark_replicas(c, &r.e);
if (ret)
goto fsck_err;
percpu_down_read(&c->mark_lock);
}
if (ret)
goto fsck_err;
i++;
}
preempt_disable();
......@@ -742,7 +821,7 @@ int bch2_accounting_read(struct bch_fs *c)
}
preempt_enable();
fsck_err:
percpu_up_read(&c->mark_lock);
percpu_up_write(&c->mark_lock);
err:
printbuf_exit(&buf);
bch2_trans_put(trans);
......
......@@ -268,7 +268,8 @@
x(BCH_ERR_nopromote, nopromote_no_writes) \
x(BCH_ERR_nopromote, nopromote_enomem) \
x(0, invalid_snapshot_node) \
x(0, option_needs_open_fs)
x(0, option_needs_open_fs) \
x(0, remove_disk_accounting_entry)
enum bch_errcode {
BCH_ERR_START = 2048,
......
......@@ -291,6 +291,7 @@ enum bch_fsck_flags {
x(alloc_key_stripe_sectors_wrong, 271, FSCK_AUTOFIX) \
x(accounting_mismatch, 272, FSCK_AUTOFIX) \
x(accounting_replicas_not_marked, 273, 0) \
x(accounting_to_invalid_device, 289, 0) \
x(invalid_btree_id, 274, 0) \
x(alloc_key_io_time_bad, 275, 0) \
x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
......@@ -300,7 +301,7 @@ enum bch_fsck_flags {
x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
x(accounting_key_version_0, 282, FSCK_AUTOFIX) \
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
x(MAX, 289, 0)
x(MAX, 290, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment