Commit dfe9bfb3 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Stripes now properly subject to gc

gc now verifies the contents of the stripes radix tree, important for
persistent alloc info
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent ad7ae8d6
...@@ -225,6 +225,8 @@ ...@@ -225,6 +225,8 @@
printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_err(c, fmt, ...) \ #define bch_err(c, fmt, ...) \
printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_err_ratelimited(c, fmt, ...) \
printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_verbose(c, fmt, ...) \ #define bch_verbose(c, fmt, ...) \
do { \ do { \
...@@ -334,6 +336,7 @@ enum bch_time_stats { ...@@ -334,6 +336,7 @@ enum bch_time_stats {
struct btree; struct btree;
enum gc_phase { enum gc_phase {
GC_PHASE_NOT_RUNNING,
GC_PHASE_START, GC_PHASE_START,
GC_PHASE_SB, GC_PHASE_SB,
...@@ -687,16 +690,17 @@ struct bch_fs { ...@@ -687,16 +690,17 @@ struct bch_fs {
/* REBALANCE */ /* REBALANCE */
struct bch_fs_rebalance rebalance; struct bch_fs_rebalance rebalance;
/* ERASURE CODING */ /* STRIPES: */
struct list_head ec_new_stripe_list; GENRADIX(struct stripe) stripes[2];
struct mutex ec_new_stripe_lock; struct mutex ec_stripe_create_lock;
GENRADIX(struct ec_stripe) ec_stripes;
struct mutex ec_stripes_lock;
ec_stripes_heap ec_stripes_heap; ec_stripes_heap ec_stripes_heap;
spinlock_t ec_stripes_heap_lock; spinlock_t ec_stripes_heap_lock;
/* ERASURE CODING */
struct list_head ec_new_stripe_list;
struct mutex ec_new_stripe_lock;
struct bio_set ec_bioset; struct bio_set ec_bioset;
struct work_struct ec_stripe_delete_work; struct work_struct ec_stripe_delete_work;
......
...@@ -332,9 +332,6 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, ...@@ -332,9 +332,6 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0)); gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
if (!c->btree_roots[btree_id].b)
return 0;
/* /*
* if expensive_debug_checks is on, run range_checks on all leaf nodes: * if expensive_debug_checks is on, run range_checks on all leaf nodes:
* *
...@@ -582,6 +579,8 @@ static void bch2_gc_free(struct bch_fs *c) ...@@ -582,6 +579,8 @@ static void bch2_gc_free(struct bch_fs *c)
struct bch_dev *ca; struct bch_dev *ca;
unsigned i; unsigned i;
genradix_free(&c->stripes[1]);
for_each_member_device(ca, c, i) { for_each_member_device(ca, c, i) {
kvpfree(rcu_dereference_protected(ca->buckets[1], 1), kvpfree(rcu_dereference_protected(ca->buckets[1], 1),
sizeof(struct bucket_array) + sizeof(struct bucket_array) +
...@@ -602,6 +601,25 @@ static void bch2_gc_done_nocheck(struct bch_fs *c) ...@@ -602,6 +601,25 @@ static void bch2_gc_done_nocheck(struct bch_fs *c)
unsigned i; unsigned i;
int cpu; int cpu;
{
struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
struct stripe *dst, *src;
c->ec_stripes_heap.used = 0;
while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
(src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
*dst = *src;
if (dst->alive)
bch2_stripes_heap_insert(c, dst, dst_iter.pos);
genradix_iter_advance(&dst_iter, &c->stripes[0]);
genradix_iter_advance(&src_iter, &c->stripes[1]);
}
}
for_each_member_device(ca, c, i) { for_each_member_device(ca, c, i) {
struct bucket_array *src = __bucket_array(ca, 1); struct bucket_array *src = __bucket_array(ca, 1);
...@@ -649,13 +667,21 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) ...@@ -649,13 +667,21 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
#define copy_field(_f, _msg, ...) \ #define copy_field(_f, _msg, ...) \
if (dst._f != src._f) { \ if (dst._f != src._f) { \
pr_info(_msg ": got %llu, should be %llu, fixing" \ bch_err(c, _msg ": got %llu, should be %llu, fixing"\
, ##__VA_ARGS__, dst._f, src._f); \ , ##__VA_ARGS__, dst._f, src._f); \
dst._f = src._f; \ dst._f = src._f; \
} }
#define copy_stripe_field(_f, _msg, ...) \
if (dst->_f != src->_f) { \
bch_err_ratelimited(c, "stripe %zu has wrong "_msg \
": got %u, should be %u, fixing", \
dst_iter.pos, ##__VA_ARGS__, \
dst->_f, src->_f); \
dst->_f = src->_f; \
}
#define copy_bucket_field(_f) \ #define copy_bucket_field(_f) \
if (dst->b[b].mark._f != src->b[b].mark._f) { \ if (dst->b[b].mark._f != src->b[b].mark._f) { \
pr_info("dev %u bucket %zu has wrong " #_f \ bch_err_ratelimited(c, "dev %u bucket %zu has wrong " #_f\
": got %u, should be %u, fixing", \ ": got %u, should be %u, fixing", \
i, b, dst->b[b].mark._f, src->b[b].mark._f); \ i, b, dst->b[b].mark._f, src->b[b].mark._f); \
dst->b[b]._mark._f = src->b[b].mark._f; \ dst->b[b]._mark._f = src->b[b].mark._f; \
...@@ -672,6 +698,36 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) ...@@ -672,6 +698,36 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
goto out; goto out;
} }
{
struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
struct stripe *dst, *src;
unsigned i;
c->ec_stripes_heap.used = 0;
while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
(src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
copy_stripe_field(alive, "alive");
copy_stripe_field(sectors, "sectors");
copy_stripe_field(algorithm, "algorithm");
copy_stripe_field(nr_blocks, "nr_blocks");
copy_stripe_field(nr_redundant, "nr_redundant");
copy_stripe_field(blocks_nonempty.counter,
"blocks_nonempty");
for (i = 0; i < ARRAY_SIZE(dst->block_sectors); i++)
copy_stripe_field(block_sectors[i].counter,
"block_sectors[%u]", i);
if (dst->alive)
bch2_stripes_heap_insert(c, dst, dst_iter.pos);
genradix_iter_advance(&dst_iter, &c->stripes[0]);
genradix_iter_advance(&src_iter, &c->stripes[1]);
}
}
for_each_member_device(ca, c, i) { for_each_member_device(ca, c, i) {
struct bucket_array *dst = __bucket_array(ca, 0); struct bucket_array *dst = __bucket_array(ca, 0);
struct bucket_array *src = __bucket_array(ca, 1); struct bucket_array *src = __bucket_array(ca, 1);
...@@ -756,10 +812,11 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) ...@@ -756,10 +812,11 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
out: out:
percpu_up_write(&c->usage_lock); percpu_up_write(&c->usage_lock);
#undef copy_field
#undef copy_fs_field #undef copy_fs_field
#undef copy_dev_field #undef copy_dev_field
#undef copy_bucket_field #undef copy_bucket_field
#undef copy_stripe_field
#undef copy_field
} }
static int bch2_gc_start(struct bch_fs *c) static int bch2_gc_start(struct bch_fs *c)
...@@ -767,6 +824,12 @@ static int bch2_gc_start(struct bch_fs *c) ...@@ -767,6 +824,12 @@ static int bch2_gc_start(struct bch_fs *c)
struct bch_dev *ca; struct bch_dev *ca;
unsigned i; unsigned i;
/*
* indicate to stripe code that we need to allocate for the gc stripes
* radix tree, too
*/
gc_pos_set(c, gc_phase(GC_PHASE_START));
BUG_ON(c->usage[1]); BUG_ON(c->usage[1]);
c->usage[1] = alloc_percpu(struct bch_fs_usage); c->usage[1] = alloc_percpu(struct bch_fs_usage);
...@@ -808,7 +871,7 @@ static int bch2_gc_start(struct bch_fs *c) ...@@ -808,7 +871,7 @@ static int bch2_gc_start(struct bch_fs *c)
percpu_up_write(&c->usage_lock); percpu_up_write(&c->usage_lock);
return 0; return bch2_ec_mem_alloc(c, true);
} }
/** /**
...@@ -873,7 +936,7 @@ int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial) ...@@ -873,7 +936,7 @@ int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
bch2_gc_done(c, initial); bch2_gc_done(c, initial);
/* Indicates that gc is no longer in progress: */ /* Indicates that gc is no longer in progress: */
__gc_pos_set(c, gc_phase(GC_PHASE_START)); __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
bch2_gc_free(c); bch2_gc_free(c);
up_write(&c->gc_lock); up_write(&c->gc_lock);
......
...@@ -303,7 +303,7 @@ static inline int is_fragmented_bucket(struct bucket_mark m, ...@@ -303,7 +303,7 @@ static inline int is_fragmented_bucket(struct bucket_mark m,
static inline enum bch_data_type bucket_type(struct bucket_mark m) static inline enum bch_data_type bucket_type(struct bucket_mark m)
{ {
return m.cached_sectors && !m.dirty_sectors return m.cached_sectors && !m.dirty_sectors
? BCH_DATA_CACHED ? BCH_DATA_CACHED
: m.data_type; : m.data_type;
} }
...@@ -375,14 +375,14 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, ...@@ -375,14 +375,14 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
preempt_disable(); preempt_disable();
dev_usage = this_cpu_ptr(ca->usage[gc]); dev_usage = this_cpu_ptr(ca->usage[gc]);
if (bucket_type(old) != bucket_type(new)) { if (bucket_type(old)) {
if (bucket_type(old)) { fs_usage->buckets[bucket_type(old)] -= ca->mi.bucket_size;
fs_usage->buckets[bucket_type(old)] -= ca->mi.bucket_size; dev_usage->buckets[bucket_type(old)]--;
dev_usage->buckets[bucket_type(old)]--; }
} else {
fs_usage->buckets[bucket_type(new)] += ca->mi.bucket_size; if (bucket_type(new)) {
dev_usage->buckets[bucket_type(new)]++; fs_usage->buckets[bucket_type(new)] += ca->mi.bucket_size;
} dev_usage->buckets[bucket_type(new)]++;
} }
dev_usage->buckets_alloc += dev_usage->buckets_alloc +=
...@@ -406,11 +406,11 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, ...@@ -406,11 +406,11 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_dev_stats_verify(ca); bch2_dev_stats_verify(ca);
} }
#define bucket_data_cmpxchg(c, ca, stats, g, new, expr) \ #define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr) \
({ \ ({ \
struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \ struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
\ \
bch2_dev_usage_update(c, ca, stats, _old, new, gc); \ bch2_dev_usage_update(c, ca, fs_usage, _old, new, gc); \
_old; \ _old; \
}) })
...@@ -638,23 +638,25 @@ static void bch2_mark_pointer(struct bch_fs *c, ...@@ -638,23 +638,25 @@ static void bch2_mark_pointer(struct bch_fs *c,
BUG_ON(!gc && bucket_became_unavailable(old, new)); BUG_ON(!gc && bucket_became_unavailable(old, new));
} }
static void bch2_mark_stripe_ptr(struct bch_fs *c, static int bch2_mark_stripe_ptr(struct bch_fs *c,
struct bch_extent_stripe_ptr p, struct bch_extent_stripe_ptr p,
s64 sectors, unsigned flags, s64 sectors, unsigned flags,
s64 *adjusted_disk_sectors, s64 *adjusted_disk_sectors,
unsigned *redundancy) unsigned *redundancy,
bool gc)
{ {
struct ec_stripe *m; struct stripe *m;
unsigned old, new, nr_data; unsigned old, new, nr_data;
int blocks_nonempty_delta; int blocks_nonempty_delta;
s64 parity_sectors; s64 parity_sectors;
m = genradix_ptr(&c->ec_stripes, p.idx); m = genradix_ptr(&c->stripes[gc], p.idx);
if (WARN_ON(!m))
return;
if (WARN_ON(!m->alive)) if (!m || !m->alive) {
return; bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
(u64) p.idx);
return -1;
}
nr_data = m->nr_blocks - m->nr_redundant; nr_data = m->nr_blocks - m->nr_redundant;
...@@ -672,20 +674,23 @@ static void bch2_mark_stripe_ptr(struct bch_fs *c, ...@@ -672,20 +674,23 @@ static void bch2_mark_stripe_ptr(struct bch_fs *c,
blocks_nonempty_delta = (int) !!new - (int) !!old; blocks_nonempty_delta = (int) !!new - (int) !!old;
if (!blocks_nonempty_delta) if (!blocks_nonempty_delta)
return; return 0;
atomic_add(blocks_nonempty_delta, &m->blocks_nonempty); atomic_add(blocks_nonempty_delta, &m->blocks_nonempty);
BUG_ON(atomic_read(&m->blocks_nonempty) < 0); BUG_ON(atomic_read(&m->blocks_nonempty) < 0);
bch2_stripes_heap_update(c, m, p.idx); if (!gc)
bch2_stripes_heap_update(c, m, p.idx);
return 0;
} }
static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
s64 sectors, enum bch_data_type data_type, s64 sectors, enum bch_data_type data_type,
struct bch_fs_usage *stats, struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags, u64 journal_seq, unsigned flags,
bool gc) bool gc)
{ {
BUG_ON(!sectors); BUG_ON(!sectors);
...@@ -701,6 +706,7 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, ...@@ -701,6 +706,7 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
unsigned replicas = 0; unsigned replicas = 0;
unsigned ec_redundancy = 0; unsigned ec_redundancy = 0;
unsigned i; unsigned i;
int ret;
extent_for_each_ptr_decode(e, p, entry) { extent_for_each_ptr_decode(e, p, entry) {
s64 disk_sectors = ptr_disk_sectors(e, p, sectors); s64 disk_sectors = ptr_disk_sectors(e, p, sectors);
...@@ -710,11 +716,14 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, ...@@ -710,11 +716,14 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
stats, journal_seq, flags, gc); stats, journal_seq, flags, gc);
if (!p.ptr.cached) if (!p.ptr.cached)
for (i = 0; i < p.ec_nr; i++) for (i = 0; i < p.ec_nr; i++) {
bch2_mark_stripe_ptr(c, p.ec[i], ret = bch2_mark_stripe_ptr(c, p.ec[i],
disk_sectors, flags, disk_sectors, flags,
&adjusted_disk_sectors, &adjusted_disk_sectors,
&ec_redundancy); &ec_redundancy, gc);
if (ret)
return ret;
}
if (!p.ptr.cached) if (!p.ptr.cached)
replicas++; replicas++;
...@@ -747,6 +756,8 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, ...@@ -747,6 +756,8 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
break; break;
} }
} }
return 0;
} }
static void bucket_set_stripe(struct bch_fs *c, static void bucket_set_stripe(struct bch_fs *c,
...@@ -767,7 +778,7 @@ static void bucket_set_stripe(struct bch_fs *c, ...@@ -767,7 +778,7 @@ static void bucket_set_stripe(struct bch_fs *c,
BUG_ON(ptr_stale(ca, ptr)); BUG_ON(ptr_stale(ca, ptr));
old = bucket_cmpxchg(g, new, ({ old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
new.stripe = enabled; new.stripe = enabled;
if (journal_seq) { if (journal_seq) {
new.journal_seq_valid = 1; new.journal_seq_valid = 1;
...@@ -776,26 +787,33 @@ static void bucket_set_stripe(struct bch_fs *c, ...@@ -776,26 +787,33 @@ static void bucket_set_stripe(struct bch_fs *c,
})); }));
BUG_ON(old.stripe == enabled); BUG_ON(old.stripe == enabled);
bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
} }
} }
static void bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k, static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
bool inserting, bool inserting,
struct bch_fs_usage *fs_usage, struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags, u64 journal_seq, unsigned flags,
bool gc) bool gc)
{ {
switch (k.k->type) { switch (k.k->type) {
case BCH_STRIPE: { case BCH_STRIPE: {
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
size_t idx = s.k->p.offset; size_t idx = s.k->p.offset;
struct ec_stripe *m = genradix_ptr(&c->ec_stripes, idx); struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
unsigned i; unsigned i;
BUG_ON(!m); if (!m || (!inserting && !m->alive)) {
BUG_ON(m->alive == inserting); bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
idx);
return -1;
}
if (inserting && m->alive) {
bch_err_ratelimited(c, "error marking stripe %zu: already exists",
idx);
return -1;
}
BUG_ON(atomic_read(&m->blocks_nonempty)); BUG_ON(atomic_read(&m->blocks_nonempty));
...@@ -809,70 +827,88 @@ static void bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k, ...@@ -809,70 +827,88 @@ static void bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
m->nr_redundant = s.v->nr_redundant; m->nr_redundant = s.v->nr_redundant;
} }
if (inserting) if (!gc) {
bch2_stripes_heap_insert(c, m, idx); if (inserting)
else bch2_stripes_heap_insert(c, m, idx);
bch2_stripes_heap_del(c, m, idx); else
bch2_stripes_heap_del(c, m, idx);
} else {
m->alive = inserting;
}
bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc); bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc);
break; break;
} }
} }
return 0;
} }
static void __bch2_mark_key(struct bch_fs *c, static int __bch2_mark_key(struct bch_fs *c,
enum bkey_type type, struct bkey_s_c k, enum bkey_type type, struct bkey_s_c k,
bool inserting, s64 sectors, bool inserting, s64 sectors,
struct bch_fs_usage *stats, struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags, u64 journal_seq, unsigned flags,
bool gc) bool gc)
{ {
int ret = 0;
switch (type) { switch (type) {
case BKEY_TYPE_BTREE: case BKEY_TYPE_BTREE:
bch2_mark_extent(c, k, inserting ret = bch2_mark_extent(c, k, inserting
? c->opts.btree_node_size ? c->opts.btree_node_size
: -c->opts.btree_node_size, : -c->opts.btree_node_size,
BCH_DATA_BTREE, BCH_DATA_BTREE,
stats, journal_seq, flags, gc); stats, journal_seq, flags, gc);
break; break;
case BKEY_TYPE_EXTENTS: case BKEY_TYPE_EXTENTS:
bch2_mark_extent(c, k, sectors, BCH_DATA_USER, ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
stats, journal_seq, flags, gc); stats, journal_seq, flags, gc);
break; break;
case BKEY_TYPE_EC: case BKEY_TYPE_EC:
bch2_mark_stripe(c, k, inserting, ret = bch2_mark_stripe(c, k, inserting,
stats, journal_seq, flags, gc); stats, journal_seq, flags, gc);
break; break;
default: default:
break; break;
} }
return ret;
} }
void bch2_mark_key(struct bch_fs *c, int bch2_mark_key(struct bch_fs *c,
enum bkey_type type, struct bkey_s_c k, enum bkey_type type, struct bkey_s_c k,
bool inserting, s64 sectors, bool inserting, s64 sectors,
struct gc_pos pos, struct gc_pos pos,
struct bch_fs_usage *stats, struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags) u64 journal_seq, unsigned flags)
{ {
int ret = 0;
percpu_down_read(&c->usage_lock); percpu_down_read(&c->usage_lock);
if (!(flags & BCH_BUCKET_MARK_GC)) { if (!(flags & BCH_BUCKET_MARK_GC)) {
if (!stats) if (!stats)
stats = this_cpu_ptr(c->usage[0]); stats = this_cpu_ptr(c->usage[0]);
__bch2_mark_key(c, type, k, inserting, sectors, ret = __bch2_mark_key(c, type, k, inserting, sectors,
stats, journal_seq, flags, false); stats, journal_seq, flags, false);
if (ret)
goto out;
} }
if ((flags & BCH_BUCKET_MARK_GC) || if ((flags & BCH_BUCKET_MARK_GC) ||
gc_visited(c, pos)) { gc_visited(c, pos)) {
__bch2_mark_key(c, type, k, inserting, sectors, ret = __bch2_mark_key(c, type, k, inserting, sectors,
this_cpu_ptr(c->usage[1]), this_cpu_ptr(c->usage[1]),
journal_seq, flags, true); journal_seq, flags, true);
if (ret)
goto out;
} }
out:
percpu_up_read(&c->usage_lock); percpu_up_read(&c->usage_lock);
return ret;
} }
void bch2_mark_update(struct btree_insert *trans, void bch2_mark_update(struct btree_insert *trans,
......
...@@ -220,9 +220,9 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, ...@@ -220,9 +220,9 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
#define BCH_BUCKET_MARK_NOATOMIC (1 << 0) #define BCH_BUCKET_MARK_NOATOMIC (1 << 0)
#define BCH_BUCKET_MARK_GC (1 << 1) #define BCH_BUCKET_MARK_GC (1 << 1)
void bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c, int bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c,
bool, s64, struct gc_pos, bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned); struct bch_fs_usage *, u64, unsigned);
void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *); void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *);
void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *); void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
......
...@@ -530,7 +530,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) ...@@ -530,7 +530,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
return ret; return ret;
} }
/* ec_stripe bucket accounting: */ /* stripe bucket accounting: */
static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
{ {
...@@ -551,7 +551,11 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) ...@@ -551,7 +551,11 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
free_heap(&n); free_heap(&n);
} }
if (!genradix_ptr_alloc(&c->ec_stripes, idx, gfp)) if (!genradix_ptr_alloc(&c->stripes[0], idx, gfp))
return -ENOMEM;
if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING &&
!genradix_ptr_alloc(&c->stripes[1], idx, gfp))
return -ENOMEM; return -ENOMEM;
return 0; return 0;
...@@ -592,27 +596,26 @@ static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h, ...@@ -592,27 +596,26 @@ static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
{ {
struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap); struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap);
genradix_ptr(&c->ec_stripes, h->data[i].idx)->heap_idx = i; genradix_ptr(&c->stripes[0], h->data[i].idx)->heap_idx = i;
} }
static void heap_verify_backpointer(struct bch_fs *c, size_t idx) static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
{ {
ec_stripes_heap *h = &c->ec_stripes_heap; ec_stripes_heap *h = &c->ec_stripes_heap;
struct ec_stripe *m = genradix_ptr(&c->ec_stripes, idx); struct stripe *m = genradix_ptr(&c->stripes[0], idx);
BUG_ON(!m->alive); BUG_ON(!m->alive);
BUG_ON(m->heap_idx >= h->used); BUG_ON(m->heap_idx >= h->used);
BUG_ON(h->data[m->heap_idx].idx != idx); BUG_ON(h->data[m->heap_idx].idx != idx);
} }
static inline unsigned stripe_entry_blocks(struct ec_stripe *m) static inline unsigned stripe_entry_blocks(struct stripe *m)
{ {
return atomic_read(&m->pin) return atomic_read(&m->blocks_nonempty);
? UINT_MAX : atomic_read(&m->blocks_nonempty);
} }
void bch2_stripes_heap_update(struct bch_fs *c, void bch2_stripes_heap_update(struct bch_fs *c,
struct ec_stripe *m, size_t idx) struct stripe *m, size_t idx)
{ {
ec_stripes_heap *h = &c->ec_stripes_heap; ec_stripes_heap *h = &c->ec_stripes_heap;
bool queue_delete; bool queue_delete;
...@@ -646,7 +649,7 @@ void bch2_stripes_heap_update(struct bch_fs *c, ...@@ -646,7 +649,7 @@ void bch2_stripes_heap_update(struct bch_fs *c,
} }
void bch2_stripes_heap_del(struct bch_fs *c, void bch2_stripes_heap_del(struct bch_fs *c,
struct ec_stripe *m, size_t idx) struct stripe *m, size_t idx)
{ {
spin_lock(&c->ec_stripes_heap_lock); spin_lock(&c->ec_stripes_heap_lock);
heap_verify_backpointer(c, idx); heap_verify_backpointer(c, idx);
...@@ -659,7 +662,7 @@ void bch2_stripes_heap_del(struct bch_fs *c, ...@@ -659,7 +662,7 @@ void bch2_stripes_heap_del(struct bch_fs *c,
} }
void bch2_stripes_heap_insert(struct bch_fs *c, void bch2_stripes_heap_insert(struct bch_fs *c,
struct ec_stripe *m, size_t idx) struct stripe *m, size_t idx)
{ {
spin_lock(&c->ec_stripes_heap_lock); spin_lock(&c->ec_stripes_heap_lock);
...@@ -678,7 +681,9 @@ void bch2_stripes_heap_insert(struct bch_fs *c, ...@@ -678,7 +681,9 @@ void bch2_stripes_heap_insert(struct bch_fs *c,
spin_unlock(&c->ec_stripes_heap_lock); spin_unlock(&c->ec_stripes_heap_lock);
} }
static void ec_stripe_delete(struct bch_fs *c, unsigned idx) /* stripe deletion */
static void ec_stripe_delete(struct bch_fs *c, size_t idx)
{ {
struct btree_iter iter; struct btree_iter iter;
struct bch_stripe *v = NULL; struct bch_stripe *v = NULL;
...@@ -717,6 +722,7 @@ static void ec_stripe_delete_work(struct work_struct *work) ...@@ -717,6 +722,7 @@ static void ec_stripe_delete_work(struct work_struct *work)
ssize_t idx; ssize_t idx;
down_read(&c->gc_lock); down_read(&c->gc_lock);
mutex_lock(&c->ec_stripe_create_lock);
while (1) { while (1) {
spin_lock(&c->ec_stripes_heap_lock); spin_lock(&c->ec_stripes_heap_lock);
...@@ -729,13 +735,15 @@ static void ec_stripe_delete_work(struct work_struct *work) ...@@ -729,13 +735,15 @@ static void ec_stripe_delete_work(struct work_struct *work)
ec_stripe_delete(c, idx); ec_stripe_delete(c, idx);
} }
mutex_unlock(&c->ec_stripe_create_lock);
up_read(&c->gc_lock); up_read(&c->gc_lock);
} }
/* stripe creation: */
static int ec_stripe_bkey_insert(struct bch_fs *c, static int ec_stripe_bkey_insert(struct bch_fs *c,
struct bkey_i_stripe *stripe) struct bkey_i_stripe *stripe)
{ {
struct ec_stripe *m;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
...@@ -755,18 +763,13 @@ static int ec_stripe_bkey_insert(struct bch_fs *c, ...@@ -755,18 +763,13 @@ static int ec_stripe_bkey_insert(struct bch_fs *c,
return bch2_btree_iter_unlock(&iter) ?: -ENOSPC; return bch2_btree_iter_unlock(&iter) ?: -ENOSPC;
found_slot: found_slot:
mutex_lock(&c->ec_stripes_lock);
ret = ec_stripe_mem_alloc(c, &iter); ret = ec_stripe_mem_alloc(c, &iter);
mutex_unlock(&c->ec_stripes_lock);
if (ret == -EINTR) if (ret == -EINTR)
goto retry; goto retry;
if (ret) if (ret)
return ret; return ret;
m = genradix_ptr(&c->ec_stripes, iter.pos.offset);
atomic_inc(&m->pin);
stripe->k.p = iter.pos; stripe->k.p = iter.pos;
ret = bch2_btree_insert_at(c, NULL, NULL, ret = bch2_btree_insert_at(c, NULL, NULL,
...@@ -775,14 +778,9 @@ static int ec_stripe_bkey_insert(struct bch_fs *c, ...@@ -775,14 +778,9 @@ static int ec_stripe_bkey_insert(struct bch_fs *c,
BTREE_INSERT_ENTRY(&iter, &stripe->k_i)); BTREE_INSERT_ENTRY(&iter, &stripe->k_i));
bch2_btree_iter_unlock(&iter); bch2_btree_iter_unlock(&iter);
if (ret)
atomic_dec(&m->pin);
return ret; return ret;
} }
/* stripe creation: */
static void extent_stripe_ptr_add(struct bkey_s_extent e, static void extent_stripe_ptr_add(struct bkey_s_extent e,
struct ec_stripe_buf *s, struct ec_stripe_buf *s,
struct bch_extent_ptr *ptr, struct bch_extent_ptr *ptr,
...@@ -858,7 +856,6 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, ...@@ -858,7 +856,6 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
*/ */
static void ec_stripe_create(struct ec_stripe_new *s) static void ec_stripe_create(struct ec_stripe_new *s)
{ {
struct ec_stripe *ec_stripe;
struct bch_fs *c = s->c; struct bch_fs *c = s->c;
struct open_bucket *ob; struct open_bucket *ob;
struct bkey_i *k; struct bkey_i *k;
...@@ -898,10 +895,12 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -898,10 +895,12 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err_put_writes; goto err_put_writes;
} }
mutex_lock(&c->ec_stripe_create_lock);
ret = ec_stripe_bkey_insert(c, &s->stripe.key); ret = ec_stripe_bkey_insert(c, &s->stripe.key);
if (ret) { if (ret) {
bch_err(c, "error creating stripe: error creating stripe key"); bch_err(c, "error creating stripe: error creating stripe key");
goto err_put_writes; goto err_unlock;
} }
for_each_keylist_key(&s->keys, k) { for_each_keylist_key(&s->keys, k) {
...@@ -910,12 +909,8 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -910,12 +909,8 @@ static void ec_stripe_create(struct ec_stripe_new *s)
break; break;
} }
ec_stripe = genradix_ptr(&c->ec_stripes, s->stripe.key.k.p.offset); err_unlock:
mutex_unlock(&c->ec_stripe_create_lock);
atomic_dec(&ec_stripe->pin);
bch2_stripes_heap_update(c, ec_stripe,
s->stripe.key.k.p.offset);
err_put_writes: err_put_writes:
percpu_ref_put(&c->writes); percpu_ref_put(&c->writes);
err: err:
...@@ -1222,7 +1217,7 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) ...@@ -1222,7 +1217,7 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
mutex_unlock(&c->ec_new_stripe_lock); mutex_unlock(&c->ec_new_stripe_lock);
} }
int bch2_fs_ec_start(struct bch_fs *c) int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
{ {
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
...@@ -1238,19 +1233,25 @@ int bch2_fs_ec_start(struct bch_fs *c) ...@@ -1238,19 +1233,25 @@ int bch2_fs_ec_start(struct bch_fs *c)
if (ret) if (ret)
return ret; return ret;
if (!init_heap(&c->ec_stripes_heap, roundup_pow_of_two(idx), if (!gc &&
!init_heap(&c->ec_stripes_heap, roundup_pow_of_two(idx),
GFP_KERNEL)) GFP_KERNEL))
return -ENOMEM; return -ENOMEM;
#if 0 #if 0
ret = genradix_prealloc(&c->ec_stripes, idx, GFP_KERNEL); ret = genradix_prealloc(&c->stripes[gc], idx, GFP_KERNEL);
#else #else
for (i = 0; i < idx; i++) for (i = 0; i < idx; i++)
if (!genradix_ptr_alloc(&c->ec_stripes, i, GFP_KERNEL)) if (!genradix_ptr_alloc(&c->stripes[gc], i, GFP_KERNEL))
return -ENOMEM; return -ENOMEM;
#endif #endif
return 0; return 0;
} }
int bch2_fs_ec_start(struct bch_fs *c)
{
return bch2_ec_mem_alloc(c, false);
}
void bch2_fs_ec_exit(struct bch_fs *c) void bch2_fs_ec_exit(struct bch_fs *c)
{ {
struct ec_stripe_head *h; struct ec_stripe_head *h;
...@@ -1271,7 +1272,7 @@ void bch2_fs_ec_exit(struct bch_fs *c) ...@@ -1271,7 +1272,7 @@ void bch2_fs_ec_exit(struct bch_fs *c)
} }
free_heap(&c->ec_stripes_heap); free_heap(&c->ec_stripes_heap);
genradix_free(&c->ec_stripes); genradix_free(&c->stripes[0]);
bioset_exit(&c->ec_bioset); bioset_exit(&c->ec_bioset);
} }
......
...@@ -93,14 +93,16 @@ void bch2_ec_stripe_head_put(struct ec_stripe_head *); ...@@ -93,14 +93,16 @@ void bch2_ec_stripe_head_put(struct ec_stripe_head *);
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *, unsigned, struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *, unsigned,
unsigned, unsigned); unsigned, unsigned);
void bch2_stripes_heap_update(struct bch_fs *, struct ec_stripe *, size_t); void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
void bch2_stripes_heap_del(struct bch_fs *, struct ec_stripe *, size_t); void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
void bch2_stripes_heap_insert(struct bch_fs *, struct ec_stripe *, size_t); void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *); void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
void bch2_ec_flush_new_stripes(struct bch_fs *); void bch2_ec_flush_new_stripes(struct bch_fs *);
int bch2_ec_mem_alloc(struct bch_fs *, bool);
int bch2_fs_ec_start(struct bch_fs *); int bch2_fs_ec_start(struct bch_fs *);
void bch2_fs_ec_exit(struct bch_fs *); void bch2_fs_ec_exit(struct bch_fs *);
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#define EC_STRIPE_MAX 16 #define EC_STRIPE_MAX 16
struct ec_stripe { struct stripe {
size_t heap_idx; size_t heap_idx;
u16 sectors; u16 sectors;
...@@ -16,7 +16,6 @@ struct ec_stripe { ...@@ -16,7 +16,6 @@ struct ec_stripe {
u8 nr_redundant; u8 nr_redundant;
u8 alive; u8 alive;
atomic_t pin;
atomic_t blocks_nonempty; atomic_t blocks_nonempty;
atomic_t block_sectors[EC_STRIPE_MAX]; atomic_t block_sectors[EC_STRIPE_MAX];
}; };
......
...@@ -208,8 +208,8 @@ static unsigned bch2_extent_ptr_durability(struct bch_fs *c, ...@@ -208,8 +208,8 @@ static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
durability = max_t(unsigned, durability, ca->mi.durability); durability = max_t(unsigned, durability, ca->mi.durability);
for (i = 0; i < p.ec_nr; i++) { for (i = 0; i < p.ec_nr; i++) {
struct ec_stripe *s = struct stripe *s =
genradix_ptr(&c->ec_stripes, p.idx); genradix_ptr(&c->stripes[0], p.idx);
if (WARN_ON(!s)) if (WARN_ON(!s))
continue; continue;
......
...@@ -700,22 +700,19 @@ static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid, ...@@ -700,22 +700,19 @@ static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid,
struct bch_fs *c = sb->s_fs_info; struct bch_fs *c = sb->s_fs_info;
struct bch_memquota_type *q = &c->quotas[kqid->type]; struct bch_memquota_type *q = &c->quotas[kqid->type];
qid_t qid = from_kqid(&init_user_ns, *kqid); qid_t qid = from_kqid(&init_user_ns, *kqid);
struct genradix_iter iter = genradix_iter_init(&q->table, qid); struct genradix_iter iter;
struct bch_memquota *mq; struct bch_memquota *mq;
int ret = 0; int ret = 0;
mutex_lock(&q->lock); mutex_lock(&q->lock);
while ((mq = genradix_iter_peek(&iter, &q->table))) { genradix_for_each_from(&q->table, iter, mq, qid)
if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) { if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) {
__bch2_quota_get(qdq, mq); __bch2_quota_get(qdq, mq);
*kqid = make_kqid(current_user_ns(), kqid->type, iter.pos); *kqid = make_kqid(current_user_ns(), kqid->type, iter.pos);
goto found; goto found;
} }
genradix_iter_advance(&iter, &q->table);
}
ret = -ENOENT; ret = -ENOENT;
found: found:
mutex_unlock(&q->lock); mutex_unlock(&q->lock);
......
...@@ -305,6 +305,9 @@ int bch2_fs_initialize(struct bch_fs *c) ...@@ -305,6 +305,9 @@ int bch2_fs_initialize(struct bch_fs *c)
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags); set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
for (i = 0; i < BTREE_ID_NR; i++)
bch2_btree_root_alloc(c, i);
ret = bch2_initial_gc(c, &journal); ret = bch2_initial_gc(c, &journal);
if (ret) if (ret)
goto err; goto err;
...@@ -316,9 +319,6 @@ int bch2_fs_initialize(struct bch_fs *c) ...@@ -316,9 +319,6 @@ int bch2_fs_initialize(struct bch_fs *c)
goto err; goto err;
} }
for (i = 0; i < BTREE_ID_NR; i++)
bch2_btree_root_alloc(c, i);
/* /*
* journal_res_get() will crash if called before this has * journal_res_get() will crash if called before this has
* set up the journal.pin FIFO and journal.cur pointer: * set up the journal.pin FIFO and journal.cur pointer:
......
...@@ -548,7 +548,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ...@@ -548,7 +548,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
INIT_LIST_HEAD(&c->ec_new_stripe_list); INIT_LIST_HEAD(&c->ec_new_stripe_list);
mutex_init(&c->ec_new_stripe_lock); mutex_init(&c->ec_new_stripe_lock);
mutex_init(&c->ec_stripes_lock); mutex_init(&c->ec_stripe_create_lock);
spin_lock_init(&c->ec_stripes_heap_lock); spin_lock_init(&c->ec_stripes_heap_lock);
seqcount_init(&c->gc_pos_lock); seqcount_init(&c->gc_pos_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment