Commit fcb3431b authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Redo checks for sufficient devices

When the replicas mechanism was added, for tracking data by which drives
it's replicated on, the check for whether we have sufficient devices was
never updated to make use of it. This patch finally does that.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 5d428c7c
......@@ -14,6 +14,9 @@
#define BCH_FORCE_IF_DATA_DEGRADED (1 << 2)
#define BCH_FORCE_IF_METADATA_DEGRADED (1 << 3)
#define BCH_FORCE_IF_LOST \
(BCH_FORCE_IF_DATA_LOST| \
BCH_FORCE_IF_METADATA_LOST)
#define BCH_FORCE_IF_DEGRADED \
(BCH_FORCE_IF_DATA_DEGRADED| \
BCH_FORCE_IF_METADATA_DEGRADED)
......
......@@ -222,6 +222,11 @@ enum opt_type {
OPT_BOOL(), \
NO_SB_OPT, false, \
NULL, "Allow mounting in degraded mode") \
x(very_degraded, u8, \
OPT_MOUNT, \
OPT_BOOL(), \
NO_SB_OPT, false, \
NULL, "Allow mounting in when data will be missing") \
x(discard, u8, \
OPT_MOUNT|OPT_DEVICE, \
OPT_BOOL(), \
......
......@@ -967,94 +967,48 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
/* Query replicas: */
struct replicas_status __bch2_replicas_status(struct bch_fs *c,
struct bch_devs_mask online_devs)
bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
unsigned flags, bool print)
{
struct bch_sb_field_members *mi;
struct bch_replicas_entry *e;
unsigned i, nr_online, nr_offline;
struct replicas_status ret;
memset(&ret, 0, sizeof(ret));
for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
ret.replicas[i].redundancy = INT_MAX;
mi = bch2_sb_get_members(c->disk_sb.sb);
bool ret = true;
percpu_down_read(&c->mark_lock);
for_each_cpu_replicas_entry(&c->replicas, e) {
if (e->data_type >= ARRAY_SIZE(ret.replicas))
panic("e %p data_type %u\n", e, e->data_type);
unsigned i, nr_online = 0, dflags = 0;
bool metadata = e->data_type < BCH_DATA_user;
nr_online = nr_offline = 0;
for (i = 0; i < e->nr_devs; i++)
nr_online += test_bit(e->devs[i], devs.d);
for (i = 0; i < e->nr_devs; i++) {
BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi,
e->devs[i]));
if (nr_online < e->nr_required)
dflags |= metadata
? BCH_FORCE_IF_METADATA_LOST
: BCH_FORCE_IF_DATA_LOST;
if (test_bit(e->devs[i], online_devs.d))
nr_online++;
else
nr_offline++;
}
if (nr_online < e->nr_devs)
dflags |= metadata
? BCH_FORCE_IF_METADATA_DEGRADED
: BCH_FORCE_IF_DATA_DEGRADED;
ret.replicas[e->data_type].redundancy =
min(ret.replicas[e->data_type].redundancy,
(int) nr_online - (int) e->nr_required);
if (dflags & ~flags) {
if (print) {
char buf[100];
ret.replicas[e->data_type].nr_offline =
max(ret.replicas[e->data_type].nr_offline,
nr_offline);
}
bch2_replicas_entry_to_text(&PBUF(buf), e);
bch_err(c, "insufficient devices online (%u) for replicas entry %s",
nr_online, buf);
}
ret = false;
break;
}
}
percpu_up_read(&c->mark_lock);
for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
if (ret.replicas[i].redundancy == INT_MAX)
ret.replicas[i].redundancy = 0;
return ret;
}
struct replicas_status bch2_replicas_status(struct bch_fs *c)
{
return __bch2_replicas_status(c, bch2_online_devs(c));
}
static bool have_enough_devs(struct replicas_status s,
enum bch_data_type type,
bool force_if_degraded,
bool force_if_lost)
{
return (!s.replicas[type].nr_offline || force_if_degraded) &&
(s.replicas[type].redundancy >= 0 || force_if_lost);
}
bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
{
return (have_enough_devs(s, BCH_DATA_journal,
flags & BCH_FORCE_IF_METADATA_DEGRADED,
flags & BCH_FORCE_IF_METADATA_LOST) &&
have_enough_devs(s, BCH_DATA_btree,
flags & BCH_FORCE_IF_METADATA_DEGRADED,
flags & BCH_FORCE_IF_METADATA_LOST) &&
have_enough_devs(s, BCH_DATA_user,
flags & BCH_FORCE_IF_DATA_DEGRADED,
flags & BCH_FORCE_IF_DATA_LOST));
}
int bch2_replicas_online(struct bch_fs *c, bool meta)
{
struct replicas_status s = bch2_replicas_status(c);
return (meta
? min(s.replicas[BCH_DATA_journal].redundancy,
s.replicas[BCH_DATA_btree].redundancy)
: s.replicas[BCH_DATA_user].redundancy) + 1;
}
unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
{
struct bch_replicas_entry *e;
......
......@@ -39,19 +39,9 @@ static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e,
e->devs[0] = dev;
}
struct replicas_status {
struct {
int redundancy;
unsigned nr_offline;
} replicas[BCH_DATA_NR];
};
struct replicas_status __bch2_replicas_status(struct bch_fs *,
struct bch_devs_mask);
struct replicas_status bch2_replicas_status(struct bch_fs *);
bool bch2_have_enough_devs(struct replicas_status, unsigned);
int bch2_replicas_online(struct bch_fs *, bool);
bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask,
unsigned, bool);
unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
int bch2_replicas_gc_end(struct bch_fs *, int);
......
......@@ -770,15 +770,13 @@ int bch2_write_super(struct bch_fs *c)
nr_wrote = dev_mask_nr(&sb_written);
can_mount_with_written =
bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
BCH_FORCE_IF_DEGRADED);
bch2_have_enough_devs(c, sb_written, BCH_FORCE_IF_DEGRADED, false);
for (i = 0; i < ARRAY_SIZE(sb_written.d); i++)
sb_written.d[i] = ~sb_written.d[i];
can_mount_without_written =
bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
BCH_FORCE_IF_DEGRADED);
bch2_have_enough_devs(c, sb_written, BCH_FORCE_IF_DEGRADED, false);
/*
* If we would be able to mount _without_ the devices we successfully
......@@ -789,6 +787,7 @@ int bch2_write_super(struct bch_fs *c)
* mount with the devices we did successfully write to:
*/
if (bch2_fs_fatal_err_on(!nr_wrote ||
!can_mount_with_written ||
(can_mount_without_written &&
!can_mount_with_written), c,
"Unable to write superblock to sufficient devices"))
......
......@@ -1265,7 +1265,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
enum bch_member_state new_state, int flags)
{
struct bch_devs_mask new_online_devs;
struct replicas_status s;
struct bch_dev *ca2;
int i, nr_rw = 0, required;
......@@ -1301,9 +1300,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
new_online_devs = bch2_online_devs(c);
__clear_bit(ca->dev_idx, new_online_devs.d);
s = __bch2_replicas_status(c, new_online_devs);
return bch2_have_enough_devs(s, flags);
return bch2_have_enough_devs(c, new_online_devs, flags, false);
default:
BUG();
}
......@@ -1311,14 +1308,18 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
static bool bch2_fs_may_start(struct bch_fs *c)
{
struct replicas_status s;
struct bch_sb_field_members *mi;
struct bch_dev *ca;
unsigned i, flags = c->opts.degraded
? BCH_FORCE_IF_DEGRADED
: 0;
unsigned i, flags = 0;
if (c->opts.very_degraded)
flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
if (!c->opts.degraded) {
if (c->opts.degraded)
flags |= BCH_FORCE_IF_DEGRADED;
if (!c->opts.degraded &&
!c->opts.very_degraded) {
mutex_lock(&c->sb_lock);
mi = bch2_sb_get_members(c->disk_sb.sb);
......@@ -1338,9 +1339,7 @@ static bool bch2_fs_may_start(struct bch_fs *c)
mutex_unlock(&c->sb_lock);
}
s = bch2_replicas_status(c);
return bch2_have_enough_devs(s, flags);
return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
}
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
......
......@@ -199,9 +199,6 @@ read_attribute(new_stripes);
rw_attribute(pd_controllers_update_seconds);
read_attribute(meta_replicas_have);
read_attribute(data_replicas_have);
read_attribute(io_timers_read);
read_attribute(io_timers_write);
......@@ -347,9 +344,6 @@ SHOW(bch2_fs)
sysfs_print(promote_whole_extents, c->promote_whole_extents);
sysfs_printf(meta_replicas_have, "%i", bch2_replicas_online(c, true));
sysfs_printf(data_replicas_have, "%i", bch2_replicas_online(c, false));
/* Debugging: */
if (attr == &sysfs_alloc_debug)
......@@ -520,9 +514,6 @@ struct attribute *bch2_fs_files[] = {
&sysfs_btree_node_size,
&sysfs_btree_cache_size,
&sysfs_meta_replicas_have,
&sysfs_data_replicas_have,
&sysfs_journal_write_delay_ms,
&sysfs_journal_reclaim_delay_ms,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment