Commit 9ca53b55 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: gc now operates on second set of bucket marks

This means we can now use gc to verify the allocation information -
important for testing persistant alloc info
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent e6473691
...@@ -930,12 +930,6 @@ static int bch2_allocator_thread(void *arg) ...@@ -930,12 +930,6 @@ static int bch2_allocator_thread(void *arg)
pr_debug("free_inc now empty"); pr_debug("free_inc now empty");
do { do {
if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) {
up_read(&c->gc_lock);
bch_err(ca, "gc failure");
goto stop;
}
/* /*
* Find some buckets that we can invalidate, either * Find some buckets that we can invalidate, either
* they're completely unused, or only contain clean data * they're completely unused, or only contain clean data
...@@ -1293,9 +1287,6 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) ...@@ -1293,9 +1287,6 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
bool invalidating_data = false; bool invalidating_data = false;
int ret = 0; int ret = 0;
if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
return -1;
if (test_alloc_startup(c)) { if (test_alloc_startup(c)) {
invalidating_data = true; invalidating_data = true;
goto not_enough; goto not_enough;
...@@ -1321,9 +1312,7 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) ...@@ -1321,9 +1312,7 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
continue; continue;
bch2_mark_alloc_bucket(c, ca, bu, true, bch2_mark_alloc_bucket(c, ca, bu, true,
gc_pos_alloc(c, NULL), gc_pos_alloc(c, NULL), 0);
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD);
fifo_push(&ca->free_inc, bu); fifo_push(&ca->free_inc, bu);
......
...@@ -347,7 +347,6 @@ enum gc_phase { ...@@ -347,7 +347,6 @@ enum gc_phase {
GC_PHASE_PENDING_DELETE, GC_PHASE_PENDING_DELETE,
GC_PHASE_ALLOC, GC_PHASE_ALLOC,
GC_PHASE_DONE
}; };
struct gc_pos { struct gc_pos {
...@@ -392,15 +391,14 @@ struct bch_dev { ...@@ -392,15 +391,14 @@ struct bch_dev {
* gc_lock, for device resize - holding any is sufficient for access: * gc_lock, for device resize - holding any is sufficient for access:
* Or rcu_read_lock(), but only for ptr_stale(): * Or rcu_read_lock(), but only for ptr_stale():
*/ */
struct bucket_array __rcu *buckets; struct bucket_array __rcu *buckets[2];
unsigned long *buckets_dirty; unsigned long *buckets_dirty;
unsigned long *buckets_written; unsigned long *buckets_written;
/* most out of date gen in the btree */ /* most out of date gen in the btree */
u8 *oldest_gens; u8 *oldest_gens;
struct rw_semaphore bucket_lock; struct rw_semaphore bucket_lock;
struct bch_dev_usage __percpu *usage_percpu; struct bch_dev_usage __percpu *usage[2];
struct bch_dev_usage usage_cached;
/* Allocator: */ /* Allocator: */
struct task_struct __rcu *alloc_thread; struct task_struct __rcu *alloc_thread;
...@@ -478,7 +476,6 @@ enum { ...@@ -478,7 +476,6 @@ enum {
/* errors: */ /* errors: */
BCH_FS_ERROR, BCH_FS_ERROR,
BCH_FS_GC_FAILURE,
/* misc: */ /* misc: */
BCH_FS_BDEV_MOUNTED, BCH_FS_BDEV_MOUNTED,
...@@ -614,8 +611,8 @@ struct bch_fs { ...@@ -614,8 +611,8 @@ struct bch_fs {
atomic64_t sectors_available; atomic64_t sectors_available;
struct bch_fs_usage __percpu *usage_percpu; struct bch_fs_usage __percpu *usage[2];
struct bch_fs_usage usage_cached;
struct percpu_rw_semaphore usage_lock; struct percpu_rw_semaphore usage_lock;
struct closure_waitlist freelist_wait; struct closure_waitlist freelist_wait;
...@@ -656,9 +653,6 @@ struct bch_fs { ...@@ -656,9 +653,6 @@ struct bch_fs {
* *
* gc_cur_phase is a superset of btree_ids (BTREE_ID_EXTENTS etc.) * gc_cur_phase is a superset of btree_ids (BTREE_ID_EXTENTS etc.)
* *
* gc_cur_phase == GC_PHASE_DONE indicates that gc is finished/not
* currently running, and gc marks are currently valid
*
* Protected by gc_pos_lock. Only written to by GC thread, so GC thread * Protected by gc_pos_lock. Only written to by GC thread, so GC thread
* can read without a lock. * can read without a lock.
*/ */
......
...@@ -260,8 +260,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type, ...@@ -260,8 +260,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
{ {
struct gc_pos pos = { 0 }; struct gc_pos pos = { 0 };
unsigned flags = unsigned flags =
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE| BCH_BUCKET_MARK_GC|
BCH_BUCKET_MARK_GC_LOCK_HELD|
(initial ? BCH_BUCKET_MARK_NOATOMIC : 0); (initial ? BCH_BUCKET_MARK_NOATOMIC : 0);
int ret = 0; int ret = 0;
...@@ -484,9 +483,6 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca, ...@@ -484,9 +483,6 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
BCH_DATA_SB, flags); BCH_DATA_SB, flags);
} }
if (c)
spin_lock(&c->journal.lock);
for (i = 0; i < ca->journal.nr; i++) { for (i = 0; i < ca->journal.nr; i++) {
b = ca->journal.buckets[i]; b = ca->journal.buckets[i];
bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_JOURNAL, bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_JOURNAL,
...@@ -495,7 +491,6 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca, ...@@ -495,7 +491,6 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
} }
if (c) { if (c) {
spin_unlock(&c->journal.lock);
percpu_up_read(&c->usage_lock); percpu_up_read(&c->usage_lock);
} else { } else {
preempt_enable(); preempt_enable();
...@@ -511,9 +506,7 @@ static void bch2_mark_superblocks(struct bch_fs *c) ...@@ -511,9 +506,7 @@ static void bch2_mark_superblocks(struct bch_fs *c)
gc_pos_set(c, gc_phase(GC_PHASE_SB)); gc_pos_set(c, gc_phase(GC_PHASE_SB));
for_each_online_member(ca, c, i) for_each_online_member(ca, c, i)
bch2_mark_dev_superblock(c, ca, bch2_mark_dev_superblock(c, ca, BCH_BUCKET_MARK_GC);
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD);
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
} }
...@@ -521,7 +514,6 @@ static void bch2_mark_superblocks(struct bch_fs *c) ...@@ -521,7 +514,6 @@ static void bch2_mark_superblocks(struct bch_fs *c)
static void bch2_mark_pending_btree_node_frees(struct bch_fs *c) static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
{ {
struct gc_pos pos = { 0 }; struct gc_pos pos = { 0 };
struct bch_fs_usage stats = { 0 };
struct btree_update *as; struct btree_update *as;
struct pending_btree_node_free *d; struct pending_btree_node_free *d;
...@@ -533,13 +525,8 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c) ...@@ -533,13 +525,8 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
bch2_mark_key(c, BKEY_TYPE_BTREE, bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&d->key), bkey_i_to_s_c(&d->key),
true, 0, true, 0,
pos, &stats, 0, pos, NULL, 0,
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE| BCH_BUCKET_MARK_GC);
BCH_BUCKET_MARK_GC_LOCK_HELD);
/*
* Don't apply stats - pending deletes aren't tracked in
* bch_alloc_stats:
*/
mutex_unlock(&c->btree_interior_update_lock); mutex_unlock(&c->btree_interior_update_lock);
} }
...@@ -560,8 +547,7 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c) ...@@ -560,8 +547,7 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c)
fifo_for_each_entry(i, &ca->free_inc, iter) fifo_for_each_entry(i, &ca->free_inc, iter)
bch2_mark_alloc_bucket(c, ca, i, true, bch2_mark_alloc_bucket(c, ca, i, true,
gc_pos_alloc(c, NULL), gc_pos_alloc(c, NULL),
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE| BCH_BUCKET_MARK_GC);
BCH_BUCKET_MARK_GC_LOCK_HELD);
...@@ -569,8 +555,7 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c) ...@@ -569,8 +555,7 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c)
fifo_for_each_entry(i, &ca->free[j], iter) fifo_for_each_entry(i, &ca->free[j], iter)
bch2_mark_alloc_bucket(c, ca, i, true, bch2_mark_alloc_bucket(c, ca, i, true,
gc_pos_alloc(c, NULL), gc_pos_alloc(c, NULL),
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE| BCH_BUCKET_MARK_GC);
BCH_BUCKET_MARK_GC_LOCK_HELD);
} }
spin_unlock(&c->freelist_lock); spin_unlock(&c->freelist_lock);
...@@ -584,8 +569,7 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c) ...@@ -584,8 +569,7 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c)
ca = bch_dev_bkey_exists(c, ob->ptr.dev); ca = bch_dev_bkey_exists(c, ob->ptr.dev);
bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr), true, bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr), true,
gc_pos_alloc(c, ob), gc_pos_alloc(c, ob),
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE| BCH_BUCKET_MARK_GC);
BCH_BUCKET_MARK_GC_LOCK_HELD);
} }
spin_unlock(&ob->lock); spin_unlock(&ob->lock);
} }
...@@ -593,122 +577,310 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c) ...@@ -593,122 +577,310 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c)
percpu_up_read(&c->usage_lock); percpu_up_read(&c->usage_lock);
} }
static void bch2_gc_start(struct bch_fs *c) static void bch2_gc_free(struct bch_fs *c)
{
struct bch_dev *ca;
unsigned i;
for_each_member_device(ca, c, i) {
kvpfree(rcu_dereference_protected(ca->buckets[1], 1),
sizeof(struct bucket_array) +
ca->mi.nbuckets * sizeof(struct bucket));
ca->buckets[1] = NULL;
free_percpu(ca->usage[1]);
ca->usage[1] = NULL;
}
free_percpu(c->usage[1]);
c->usage[1] = NULL;
}
static void bch2_gc_done_nocheck(struct bch_fs *c)
{ {
struct bch_dev *ca; struct bch_dev *ca;
struct bucket_array *buckets;
struct bucket_mark new;
unsigned i; unsigned i;
size_t b;
int cpu; int cpu;
percpu_down_write(&c->usage_lock); for_each_member_device(ca, c, i) {
struct bucket_array *src = __bucket_array(ca, 1);
/* memcpy(__bucket_array(ca, 0), src,
* Indicates to buckets code that gc is now in progress - done under sizeof(struct bucket_array) +
* usage_lock to avoid racing with bch2_mark_key(): sizeof(struct bucket) * src->nbuckets);
*/ };
__gc_pos_set(c, gc_phase(GC_PHASE_START));
/* Save a copy of the existing bucket stats while we recompute them: */
for_each_member_device(ca, c, i) { for_each_member_device(ca, c, i) {
ca->usage_cached = __bch2_dev_usage_read(ca); struct bch_dev_usage *p;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct bch_dev_usage *p = p = per_cpu_ptr(ca->usage[0], cpu);
per_cpu_ptr(ca->usage_percpu, cpu);
memset(p, 0, sizeof(*p)); memset(p, 0, sizeof(*p));
} }
preempt_disable();
*this_cpu_ptr(ca->usage[0]) = __bch2_dev_usage_read(ca, 1);
preempt_enable();
} }
c->usage_cached = __bch2_fs_usage_read(c); {
for_each_possible_cpu(cpu) { struct bch_fs_usage src = __bch2_fs_usage_read(c, 1);
struct bch_fs_usage *p = struct bch_fs_usage *p;
per_cpu_ptr(c->usage_percpu, cpu);
memset(p->replicas, 0, sizeof(p->replicas)); for_each_possible_cpu(cpu) {
memset(p->buckets, 0, sizeof(p->buckets)); p = per_cpu_ptr(c->usage[0], cpu);
memset(p, 0, offsetof(typeof(*p), online_reserved));
}
preempt_disable();
memcpy(this_cpu_ptr(c->usage[0]),
&src,
offsetof(typeof(*p), online_reserved));
preempt_enable();
} }
}
static void bch2_gc_done(struct bch_fs *c, bool initial)
{
struct bch_dev *ca;
unsigned i;
int cpu;
#define copy_field(_f, _msg, ...) \
if (dst._f != src._f) { \
pr_info(_msg ": got %llu, should be %llu, fixing" \
, ##__VA_ARGS__, dst._f, src._f); \
dst._f = src._f; \
}
#define copy_bucket_field(_f) \
if (dst->b[b].mark._f != src->b[b].mark._f) { \
pr_info("dev %u bucket %zu has wrong " #_f \
": got %u, should be %u, fixing", \
i, b, dst->b[b].mark._f, src->b[b].mark._f); \
dst->b[b]._mark._f = src->b[b].mark._f; \
}
#define copy_dev_field(_f, _msg, ...) \
copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__)
#define copy_fs_field(_f, _msg, ...) \
copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
percpu_down_write(&c->usage_lock);
if (initial) {
bch2_gc_done_nocheck(c);
goto out;
}
for_each_member_device(ca, c, i) {
struct bucket_array *dst = __bucket_array(ca, 0);
struct bucket_array *src = __bucket_array(ca, 1);
size_t b;
if (initial) {
memcpy(dst, src,
sizeof(struct bucket_array) +
sizeof(struct bucket) * dst->nbuckets);
}
for (b = 0; b < src->nbuckets; b++) {
copy_bucket_field(gen);
copy_bucket_field(data_type);
copy_bucket_field(owned_by_allocator);
copy_bucket_field(stripe);
copy_bucket_field(dirty_sectors);
copy_bucket_field(cached_sectors);
}
};
for_each_member_device(ca, c, i) {
struct bch_dev_usage dst = __bch2_dev_usage_read(ca, 0);
struct bch_dev_usage src = __bch2_dev_usage_read(ca, 1);
struct bch_dev_usage *p;
unsigned b;
for (b = 0; b < BCH_DATA_NR; b++)
copy_dev_field(buckets[b],
"buckets[%s]", bch2_data_types[b]);
copy_dev_field(buckets_alloc, "buckets_alloc");
copy_dev_field(buckets_ec, "buckets_ec");
for (b = 0; b < BCH_DATA_NR; b++)
copy_dev_field(sectors[b],
"sectors[%s]", bch2_data_types[b]);
copy_dev_field(sectors_fragmented,
"sectors_fragmented");
for_each_possible_cpu(cpu) {
p = per_cpu_ptr(ca->usage[0], cpu);
memset(p, 0, sizeof(*p));
}
preempt_disable();
p = this_cpu_ptr(ca->usage[0]);
*p = dst;
preempt_enable();
}
{
struct bch_fs_usage dst = __bch2_fs_usage_read(c, 0);
struct bch_fs_usage src = __bch2_fs_usage_read(c, 1);
struct bch_fs_usage *p;
unsigned r, b;
for (r = 0; r < BCH_REPLICAS_MAX; r++) {
for (b = 0; b < BCH_DATA_NR; b++)
copy_fs_field(replicas[r].data[b],
"replicas[%i].data[%s]",
r, bch2_data_types[b]);
copy_fs_field(replicas[r].ec_data,
"replicas[%i].ec_data", r);
copy_fs_field(replicas[r].persistent_reserved,
"replicas[%i].persistent_reserved", r);
}
for (b = 0; b < BCH_DATA_NR; b++)
copy_fs_field(buckets[b],
"buckets[%s]", bch2_data_types[b]);
for_each_possible_cpu(cpu) {
p = per_cpu_ptr(c->usage[0], cpu);
memset(p, 0, offsetof(typeof(*p), online_reserved));
}
preempt_disable();
p = this_cpu_ptr(c->usage[0]);
memcpy(p, &dst, offsetof(typeof(*p), online_reserved));
preempt_enable();
}
out:
percpu_up_write(&c->usage_lock); percpu_up_write(&c->usage_lock);
/* Clear bucket marks: */ #undef copy_field
#undef copy_fs_field
#undef copy_dev_field
#undef copy_bucket_field
}
static int bch2_gc_start(struct bch_fs *c)
{
struct bch_dev *ca;
unsigned i;
BUG_ON(c->usage[1]);
c->usage[1] = alloc_percpu(struct bch_fs_usage);
if (!c->usage[1])
return -ENOMEM;
for_each_member_device(ca, c, i) { for_each_member_device(ca, c, i) {
down_read(&ca->bucket_lock); BUG_ON(ca->buckets[1]);
buckets = bucket_array(ca); BUG_ON(ca->usage[1]);
for (b = buckets->first_bucket; b < buckets->nbuckets; b++) { ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) +
bucket_cmpxchg(buckets->b + b, new, ({ ca->mi.nbuckets * sizeof(struct bucket),
new.owned_by_allocator = 0; GFP_KERNEL|__GFP_ZERO);
new.data_type = 0; if (!ca->buckets[1]) {
new.cached_sectors = 0; percpu_ref_put(&ca->ref);
new.dirty_sectors = 0; return -ENOMEM;
new.stripe = 0; }
}));
ca->oldest_gens[b] = new.gen; ca->usage[1] = alloc_percpu(struct bch_dev_usage);
if (!ca->usage[1]) {
percpu_ref_put(&ca->ref);
return -ENOMEM;
} }
up_read(&ca->bucket_lock);
} }
percpu_down_write(&c->usage_lock);
for_each_member_device(ca, c, i) {
struct bucket_array *dst = __bucket_array(ca, 1);
struct bucket_array *src = __bucket_array(ca, 0);
size_t b;
dst->first_bucket = src->first_bucket;
dst->nbuckets = src->nbuckets;
for (b = 0; b < src->nbuckets; b++)
dst->b[b]._mark.gen = src->b[b].mark.gen;
};
percpu_up_write(&c->usage_lock);
return 0;
} }
/** /**
* bch_gc - recompute bucket marks and oldest_gen, rewrite btree nodes * bch2_gc - walk _all_ references to buckets, and recompute them:
*
* Order matters here:
* - Concurrent GC relies on the fact that we have a total ordering for
* everything that GC walks - see gc_will_visit_node(),
* gc_will_visit_root()
*
* - also, references move around in the course of index updates and
* various other crap: everything needs to agree on the ordering
* references are allowed to move around in - e.g., we're allowed to
* start with a reference owned by an open_bucket (the allocator) and
* move it to the btree, but not the reverse.
*
* This is necessary to ensure that gc doesn't miss references that
* move around - if references move backwards in the ordering GC
* uses, GC could skip past them
*/ */
void bch2_gc(struct bch_fs *c) int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
{ {
struct bch_dev *ca; struct bch_dev *ca;
u64 start_time = local_clock(); u64 start_time = local_clock();
unsigned i; unsigned i, iter = 0;
int ret; int ret;
/*
* Walk _all_ references to buckets, and recompute them:
*
* Order matters here:
* - Concurrent GC relies on the fact that we have a total ordering for
* everything that GC walks - see gc_will_visit_node(),
* gc_will_visit_root()
*
* - also, references move around in the course of index updates and
* various other crap: everything needs to agree on the ordering
* references are allowed to move around in - e.g., we're allowed to
* start with a reference owned by an open_bucket (the allocator) and
* move it to the btree, but not the reverse.
*
* This is necessary to ensure that gc doesn't miss references that
* move around - if references move backwards in the ordering GC
* uses, GC could skip past them
*/
trace_gc_start(c); trace_gc_start(c);
/*
* Do this before taking gc_lock - bch2_disk_reservation_get() blocks on
* gc_lock if sectors_available goes to 0:
*/
bch2_recalc_sectors_available(c);
down_write(&c->gc_lock); down_write(&c->gc_lock);
if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) again:
ret = bch2_gc_start(c);
if (ret)
goto out; goto out;
bch2_gc_start(c);
bch2_mark_superblocks(c); bch2_mark_superblocks(c);
ret = bch2_gc_btrees(c, NULL, false); ret = bch2_gc_btrees(c, journal, initial);
if (ret) { if (ret)
bch_err(c, "btree gc failed: %d", ret);
set_bit(BCH_FS_GC_FAILURE, &c->flags);
goto out; goto out;
}
bch2_mark_pending_btree_node_frees(c); bch2_mark_pending_btree_node_frees(c);
bch2_mark_allocator_buckets(c); bch2_mark_allocator_buckets(c);
/* Indicates that gc is no longer in progress: */
gc_pos_set(c, gc_phase(GC_PHASE_DONE));
c->gc_count++; c->gc_count++;
out: out:
if (!ret && test_bit(BCH_FS_FIXED_GENS, &c->flags)) {
/*
* XXX: make sure gens we fixed got saved
*/
if (iter++ <= 2) {
bch_info(c, "Fixed gens, restarting mark and sweep:");
clear_bit(BCH_FS_FIXED_GENS, &c->flags);
goto again;
}
bch_info(c, "Unable to fix bucket gens, looping");
ret = -EINVAL;
}
if (!ret)
bch2_gc_done(c, initial);
/* Indicates that gc is no longer in progress: */
__gc_pos_set(c, gc_phase(GC_PHASE_START));
bch2_gc_free(c);
up_write(&c->gc_lock); up_write(&c->gc_lock);
if (!ret && initial)
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
trace_gc_end(c); trace_gc_end(c);
bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time); bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
...@@ -724,6 +896,7 @@ void bch2_gc(struct bch_fs *c) ...@@ -724,6 +896,7 @@ void bch2_gc(struct bch_fs *c)
* allocator thread - issue wakeup in case they blocked on gc_lock: * allocator thread - issue wakeup in case they blocked on gc_lock:
*/ */
closure_wake_up(&c->freelist_wait); closure_wake_up(&c->freelist_wait);
return ret;
} }
/* Btree coalescing */ /* Btree coalescing */
...@@ -1039,9 +1212,6 @@ void bch2_coalesce(struct bch_fs *c) ...@@ -1039,9 +1212,6 @@ void bch2_coalesce(struct bch_fs *c)
{ {
enum btree_id id; enum btree_id id;
if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
return;
down_read(&c->gc_lock); down_read(&c->gc_lock);
trace_gc_coalesce_start(c); trace_gc_coalesce_start(c);
...@@ -1053,7 +1223,6 @@ void bch2_coalesce(struct bch_fs *c) ...@@ -1053,7 +1223,6 @@ void bch2_coalesce(struct bch_fs *c)
if (ret) { if (ret) {
if (ret != -ESHUTDOWN) if (ret != -ESHUTDOWN)
bch_err(c, "btree coalescing failed: %d", ret); bch_err(c, "btree coalescing failed: %d", ret);
set_bit(BCH_FS_GC_FAILURE, &c->flags);
return; return;
} }
} }
...@@ -1068,6 +1237,7 @@ static int bch2_gc_thread(void *arg) ...@@ -1068,6 +1237,7 @@ static int bch2_gc_thread(void *arg)
struct io_clock *clock = &c->io_clock[WRITE]; struct io_clock *clock = &c->io_clock[WRITE];
unsigned long last = atomic_long_read(&clock->now); unsigned long last = atomic_long_read(&clock->now);
unsigned last_kick = atomic_read(&c->kick_gc); unsigned last_kick = atomic_read(&c->kick_gc);
int ret;
set_freezable(); set_freezable();
...@@ -1101,7 +1271,9 @@ static int bch2_gc_thread(void *arg) ...@@ -1101,7 +1271,9 @@ static int bch2_gc_thread(void *arg)
last = atomic_long_read(&clock->now); last = atomic_long_read(&clock->now);
last_kick = atomic_read(&c->kick_gc); last_kick = atomic_read(&c->kick_gc);
bch2_gc(c); ret = bch2_gc(c, NULL, false);
if (ret)
bch_err(c, "btree gc failed: %i", ret);
debug_check_no_locks_held(); debug_check_no_locks_held();
} }
...@@ -1142,30 +1314,7 @@ int bch2_gc_thread_start(struct bch_fs *c) ...@@ -1142,30 +1314,7 @@ int bch2_gc_thread_start(struct bch_fs *c)
int bch2_initial_gc(struct bch_fs *c, struct list_head *journal) int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
{ {
unsigned iter = 0; int ret = bch2_gc(c, journal, true);
int ret = 0;
down_write(&c->gc_lock);
again:
bch2_gc_start(c);
bch2_mark_superblocks(c);
ret = bch2_gc_btrees(c, journal, true);
if (ret)
goto err;
if (test_bit(BCH_FS_FIXED_GENS, &c->flags)) {
if (iter++ > 2) {
bch_info(c, "Unable to fix bucket gens, looping");
ret = -EINVAL;
goto err;
}
bch_info(c, "Fixed gens, restarting initial mark and sweep:");
clear_bit(BCH_FS_FIXED_GENS, &c->flags);
goto again;
}
/* /*
* Skip past versions that might have possibly been used (as nonces), * Skip past versions that might have possibly been used (as nonces),
...@@ -1174,9 +1323,5 @@ int bch2_initial_gc(struct bch_fs *c, struct list_head *journal) ...@@ -1174,9 +1323,5 @@ int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
if (c->sb.encryption_type) if (c->sb.encryption_type)
atomic64_add(1 << 16, &c->key_version); atomic64_add(1 << 16, &c->key_version);
gc_pos_set(c, gc_phase(GC_PHASE_DONE));
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
err:
up_write(&c->gc_lock);
return ret; return ret;
} }
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
enum bkey_type; enum bkey_type;
void bch2_coalesce(struct bch_fs *); void bch2_coalesce(struct bch_fs *);
void bch2_gc(struct bch_fs *); int bch2_gc(struct bch_fs *, struct list_head *, bool);
void bch2_gc_thread_stop(struct bch_fs *); void bch2_gc_thread_stop(struct bch_fs *);
int bch2_gc_thread_start(struct bch_fs *); int bch2_gc_thread_start(struct bch_fs *);
int bch2_initial_gc(struct bch_fs *, struct list_head *); int bch2_initial_gc(struct bch_fs *, struct list_head *);
...@@ -105,14 +105,14 @@ static inline struct gc_pos gc_pos_alloc(struct bch_fs *c, struct open_bucket *o ...@@ -105,14 +105,14 @@ static inline struct gc_pos gc_pos_alloc(struct bch_fs *c, struct open_bucket *o
}; };
} }
static inline bool gc_will_visit(struct bch_fs *c, struct gc_pos pos) static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
{ {
unsigned seq; unsigned seq;
bool ret; bool ret;
do { do {
seq = read_seqcount_begin(&c->gc_pos_lock); seq = read_seqcount_begin(&c->gc_pos_lock);
ret = gc_pos_cmp(c->gc_pos, pos) < 0; ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
} while (read_seqcount_retry(&c->gc_pos_lock, seq)); } while (read_seqcount_retry(&c->gc_pos_lock, seq));
return ret; return ret;
......
...@@ -160,7 +160,6 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b, ...@@ -160,7 +160,6 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
{ {
struct bch_fs *c = as->c; struct bch_fs *c = as->c;
struct pending_btree_node_free *d; struct pending_btree_node_free *d;
unsigned replicas;
/* /*
* btree_update lock is only needed here to avoid racing with * btree_update lock is only needed here to avoid racing with
...@@ -178,15 +177,6 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b, ...@@ -178,15 +177,6 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
BUG_ON(d->index_update_done); BUG_ON(d->index_update_done);
d->index_update_done = true; d->index_update_done = true;
/*
* Btree nodes are accounted as freed in bch_alloc_stats when they're
* freed from the index:
*/
replicas = bch2_extent_nr_dirty_ptrs(k);
if (replicas)
stats->replicas[replicas - 1].data[BCH_DATA_BTREE] -=
c->opts.btree_node_size * replicas;
/* /*
* We're dropping @k from the btree, but it's still live until the * We're dropping @k from the btree, but it's still live until the
* index update is persistent so we need to keep a reference around for * index update is persistent so we need to keep a reference around for
...@@ -208,15 +198,16 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b, ...@@ -208,15 +198,16 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
* bch2_mark_key() compares the current gc pos to the pos we're * bch2_mark_key() compares the current gc pos to the pos we're
* moving this reference from, hence one comparison here: * moving this reference from, hence one comparison here:
*/ */
if (gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) { if (gc_pos_cmp(c->gc_pos, b
struct bch_fs_usage tmp = { 0 }; ? gc_pos_btree_node(b)
: gc_pos_btree_root(as->btree_id)) >= 0 &&
gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) {
struct gc_pos pos = { 0 };
bch2_mark_key(c, BKEY_TYPE_BTREE, bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&d->key), bkey_i_to_s_c(&d->key),
false, 0, b false, 0, pos,
? gc_pos_btree_node(b) NULL, 0, BCH_BUCKET_MARK_GC);
: gc_pos_btree_root(as->btree_id),
&tmp, 0, 0);
/* /*
* Don't apply tmp - pending deletes aren't tracked in * Don't apply tmp - pending deletes aren't tracked in
* bch_alloc_stats: * bch_alloc_stats:
...@@ -287,19 +278,13 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b, ...@@ -287,19 +278,13 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
static void bch2_btree_node_free_ondisk(struct bch_fs *c, static void bch2_btree_node_free_ondisk(struct bch_fs *c,
struct pending_btree_node_free *pending) struct pending_btree_node_free *pending)
{ {
struct bch_fs_usage stats = { 0 };
BUG_ON(!pending->index_update_done); BUG_ON(!pending->index_update_done);
bch2_mark_key(c, BKEY_TYPE_BTREE, bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&pending->key), bkey_i_to_s_c(&pending->key),
false, 0, false, 0,
gc_phase(GC_PHASE_PENDING_DELETE), gc_phase(GC_PHASE_PENDING_DELETE),
&stats, 0, 0); NULL, 0, 0);
/*
* Don't apply stats - pending deletes aren't tracked in
* bch_alloc_stats:
*/
} }
static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
...@@ -1939,6 +1924,25 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, ...@@ -1939,6 +1924,25 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
btree_interior_update_add_node_reference(as, b); btree_interior_update_add_node_reference(as, b);
/*
* XXX: the rest of the update path treats this like we're actually
* inserting a new node and deleting the existing node, so the
* reservation needs to include enough space for @b
*
* that is actually sketch as fuck though and I am surprised the code
* seems to work like that, definitely need to go back and rework it
* into something saner.
*
* (I think @b is just getting double counted until the btree update
* finishes and "deletes" @b on disk)
*/
ret = bch2_disk_reservation_add(c, &as->reserve->disk_res,
c->opts.btree_node_size *
bch2_extent_nr_ptrs(extent_i_to_s_c(new_key)),
BCH_DISK_RESERVATION_NOFAIL|
BCH_DISK_RESERVATION_GC_LOCK_HELD);
BUG_ON(ret);
parent = btree_node_parent(iter, b); parent = btree_node_parent(iter, b);
if (parent) { if (parent) {
if (new_hash) { if (new_hash) {
......
...@@ -85,8 +85,7 @@ static inline u64 __bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage); ...@@ -85,8 +85,7 @@ static inline u64 __bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
static void bch2_fs_stats_verify(struct bch_fs *c) static void bch2_fs_stats_verify(struct bch_fs *c)
{ {
struct bch_fs_usage stats = struct bch_fs_usage stats =_bch2_fs_usage_read(c);
__bch2_fs_usage_read(c);
unsigned i, j; unsigned i, j;
for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) { for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) {
...@@ -209,43 +208,24 @@ do { \ ...@@ -209,43 +208,24 @@ do { \
_acc; \ _acc; \
}) })
#define bch2_usage_read_cached(_c, _cached, _uncached) \ struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *ca, bool gc)
({ \
typeof(_cached) _ret; \
unsigned _seq; \
\
do { \
_seq = read_seqcount_begin(&(_c)->gc_pos_lock); \
_ret = (_c)->gc_pos.phase == GC_PHASE_DONE \
? bch2_usage_read_raw(_uncached) \
: (_cached); \
} while (read_seqcount_retry(&(_c)->gc_pos_lock, _seq)); \
\
_ret; \
})
struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *ca)
{ {
return bch2_usage_read_raw(ca->usage_percpu); return bch2_usage_read_raw(ca->usage[gc]);
} }
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca) struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
{ {
return bch2_usage_read_cached(c, ca->usage_cached, ca->usage_percpu); return bch2_usage_read_raw(ca->usage[0]);
} }
struct bch_fs_usage struct bch_fs_usage __bch2_fs_usage_read(struct bch_fs *c, bool gc)
__bch2_fs_usage_read(struct bch_fs *c)
{ {
return bch2_usage_read_raw(c->usage_percpu); return bch2_usage_read_raw(c->usage[gc]);
} }
struct bch_fs_usage struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *c)
bch2_fs_usage_read(struct bch_fs *c)
{ {
return bch2_usage_read_cached(c, return bch2_usage_read_raw(c->usage[0]);
c->usage_cached,
c->usage_percpu);
} }
struct fs_usage_sum { struct fs_usage_sum {
...@@ -327,13 +307,11 @@ static inline enum bch_data_type bucket_type(struct bucket_mark m) ...@@ -327,13 +307,11 @@ static inline enum bch_data_type bucket_type(struct bucket_mark m)
: m.data_type; : m.data_type;
} }
static bool bucket_became_unavailable(struct bch_fs *c, static bool bucket_became_unavailable(struct bucket_mark old,
struct bucket_mark old,
struct bucket_mark new) struct bucket_mark new)
{ {
return is_available_bucket(old) && return is_available_bucket(old) &&
!is_available_bucket(new) && !is_available_bucket(new);
(!c || c->gc_pos.phase == GC_PHASE_DONE);
} }
void bch2_fs_usage_apply(struct bch_fs *c, void bch2_fs_usage_apply(struct bch_fs *c,
...@@ -364,11 +342,13 @@ void bch2_fs_usage_apply(struct bch_fs *c, ...@@ -364,11 +342,13 @@ void bch2_fs_usage_apply(struct bch_fs *c,
percpu_down_read(&c->usage_lock); percpu_down_read(&c->usage_lock);
preempt_disable(); preempt_disable();
/* online_reserved not subject to gc: */ /* online_reserved not subject to gc: */
this_cpu_add(c->usage_percpu->online_reserved, stats->online_reserved); this_cpu_add(c->usage[0]->online_reserved, stats->online_reserved);
stats->online_reserved = 0; stats->online_reserved = 0;
if (!gc_will_visit(c, gc_pos)) bch2_usage_add(this_cpu_ptr(c->usage[0]), stats);
bch2_usage_add(this_cpu_ptr(c->usage_percpu), stats);
if (gc_visited(c, gc_pos))
bch2_usage_add(this_cpu_ptr(c->usage[1]), stats);
bch2_fs_stats_verify(c); bch2_fs_stats_verify(c);
preempt_enable(); preempt_enable();
...@@ -378,8 +358,9 @@ void bch2_fs_usage_apply(struct bch_fs *c, ...@@ -378,8 +358,9 @@ void bch2_fs_usage_apply(struct bch_fs *c,
} }
static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
struct bch_fs_usage *stats, struct bch_fs_usage *fs_usage,
struct bucket_mark old, struct bucket_mark new) struct bucket_mark old, struct bucket_mark new,
bool gc)
{ {
struct bch_dev_usage *dev_usage; struct bch_dev_usage *dev_usage;
...@@ -391,14 +372,18 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, ...@@ -391,14 +372,18 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_data_types[old.data_type], bch2_data_types[old.data_type],
bch2_data_types[new.data_type]); bch2_data_types[new.data_type]);
stats->buckets[bucket_type(old)] -= ca->mi.bucket_size;
stats->buckets[bucket_type(new)] += ca->mi.bucket_size;
preempt_disable(); preempt_disable();
dev_usage = this_cpu_ptr(ca->usage_percpu); dev_usage = this_cpu_ptr(ca->usage[gc]);
dev_usage->buckets[bucket_type(old)]--; if (bucket_type(old) != bucket_type(new)) {
dev_usage->buckets[bucket_type(new)]++; if (bucket_type(old)) {
fs_usage->buckets[bucket_type(old)] -= ca->mi.bucket_size;
dev_usage->buckets[bucket_type(old)]--;
} else {
fs_usage->buckets[bucket_type(new)] += ca->mi.bucket_size;
dev_usage->buckets[bucket_type(new)]++;
}
}
dev_usage->buckets_alloc += dev_usage->buckets_alloc +=
(int) new.owned_by_allocator - (int) old.owned_by_allocator; (int) new.owned_by_allocator - (int) old.owned_by_allocator;
...@@ -425,21 +410,18 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, ...@@ -425,21 +410,18 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
({ \ ({ \
struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \ struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
\ \
bch2_dev_usage_update(c, ca, stats, _old, new); \ bch2_dev_usage_update(c, ca, stats, _old, new, gc); \
_old; \ _old; \
}) })
void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, struct bucket_mark *old) size_t b, struct bucket_mark *old,
bool gc)
{ {
struct bch_fs_usage *stats = this_cpu_ptr(c->usage_percpu); struct bch_fs_usage *stats = this_cpu_ptr(c->usage[gc]);
struct bucket *g; struct bucket *g = __bucket(ca, b, gc);
struct bucket_mark new; struct bucket_mark new;
percpu_rwsem_assert_held(&c->usage_lock);
g = bucket(ca, b);
*old = bucket_data_cmpxchg(c, ca, stats, g, new, ({ *old = bucket_data_cmpxchg(c, ca, stats, g, new, ({
BUG_ON(!is_available_bucket(new)); BUG_ON(!is_available_bucket(new));
...@@ -450,38 +432,49 @@ void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, ...@@ -450,38 +432,49 @@ void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
new.gen++; new.gen++;
})); }));
/*
* This isn't actually correct yet, since fs usage is still
* uncompressed sectors:
*/
stats->replicas[0].data[BCH_DATA_CACHED] -= old->cached_sectors; stats->replicas[0].data[BCH_DATA_CACHED] -= old->cached_sectors;
}
void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, struct bucket_mark *old)
{
percpu_rwsem_assert_held(&c->usage_lock);
__bch2_invalidate_bucket(c, ca, b, old, false);
if (!old->owned_by_allocator && old->cached_sectors) if (!old->owned_by_allocator && old->cached_sectors)
trace_invalidate(ca, bucket_to_sector(ca, b), trace_invalidate(ca, bucket_to_sector(ca, b),
old->cached_sectors); old->cached_sectors);
} }
void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, static void __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, bool owned_by_allocator, size_t b, bool owned_by_allocator,
struct gc_pos pos, unsigned flags) bool gc)
{ {
struct bch_fs_usage *stats = this_cpu_ptr(c->usage_percpu); struct bch_fs_usage *stats = this_cpu_ptr(c->usage[gc]);
struct bucket *g; struct bucket *g = __bucket(ca, b, gc);
struct bucket_mark old, new; struct bucket_mark old, new;
percpu_rwsem_assert_held(&c->usage_lock);
g = bucket(ca, b);
if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
gc_will_visit(c, pos))
return;
old = bucket_data_cmpxchg(c, ca, stats, g, new, ({ old = bucket_data_cmpxchg(c, ca, stats, g, new, ({
new.owned_by_allocator = owned_by_allocator; new.owned_by_allocator = owned_by_allocator;
})); }));
BUG_ON(!owned_by_allocator && !old.owned_by_allocator && BUG_ON(!gc &&
c->gc_pos.phase == GC_PHASE_DONE); !owned_by_allocator && !old.owned_by_allocator);
}
void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, bool owned_by_allocator,
struct gc_pos pos, unsigned flags)
{
percpu_rwsem_assert_held(&c->usage_lock);
if (!(flags & BCH_BUCKET_MARK_GC))
__bch2_mark_alloc_bucket(c, ca, b, owned_by_allocator, false);
if ((flags & BCH_BUCKET_MARK_GC) ||
gc_visited(c, pos))
__bch2_mark_alloc_bucket(c, ca, b, owned_by_allocator, true);
} }
#define checked_add(a, b) \ #define checked_add(a, b) \
...@@ -491,37 +484,49 @@ do { \ ...@@ -491,37 +484,49 @@ do { \
BUG_ON((a) != _res); \ BUG_ON((a) != _res); \
} while (0) } while (0)
static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, enum bch_data_type type,
unsigned sectors, bool gc)
{
struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]);
struct bucket *g = __bucket(ca, b, gc);
struct bucket_mark old, new;
BUG_ON(type != BCH_DATA_SB &&
type != BCH_DATA_JOURNAL);
old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
new.data_type = type;
checked_add(new.dirty_sectors, sectors);
}));
fs_usage->replicas[0].data[type] += sectors;
}
void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, enum bch_data_type type, size_t b, enum bch_data_type type,
unsigned sectors, struct gc_pos pos, unsigned sectors, struct gc_pos pos,
unsigned flags) unsigned flags)
{ {
struct bch_fs_usage *stats;
struct bucket *g;
struct bucket_mark old, new;
BUG_ON(type != BCH_DATA_SB && BUG_ON(type != BCH_DATA_SB &&
type != BCH_DATA_JOURNAL); type != BCH_DATA_JOURNAL);
preempt_disable();
if (likely(c)) { if (likely(c)) {
percpu_rwsem_assert_held(&c->usage_lock); percpu_rwsem_assert_held(&c->usage_lock);
if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) && if (!(flags & BCH_BUCKET_MARK_GC))
gc_will_visit(c, pos)) __bch2_mark_metadata_bucket(c, ca, b, type, sectors,
return; false);
if ((flags & BCH_BUCKET_MARK_GC) ||
preempt_disable(); gc_visited(c, pos))
stats = this_cpu_ptr(c->usage_percpu); __bch2_mark_metadata_bucket(c, ca, b, type, sectors,
true);
g = bucket(ca, b);
old = bucket_data_cmpxchg(c, ca, stats, g, new, ({
new.data_type = type;
checked_add(new.dirty_sectors, sectors);
}));
stats->replicas[0].data[type] += sectors;
preempt_enable();
} else { } else {
struct bucket *g;
struct bucket_mark old, new;
rcu_read_lock(); rcu_read_lock();
g = bucket(ca, b); g = bucket(ca, b);
...@@ -533,8 +538,7 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, ...@@ -533,8 +538,7 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
rcu_read_unlock(); rcu_read_unlock();
} }
BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) && preempt_enable();
bucket_became_unavailable(c, old, new));
} }
static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors) static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors)
...@@ -579,23 +583,15 @@ static void bch2_mark_pointer(struct bch_fs *c, ...@@ -579,23 +583,15 @@ static void bch2_mark_pointer(struct bch_fs *c,
struct extent_ptr_decoded p, struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type data_type, s64 sectors, enum bch_data_type data_type,
struct bch_fs_usage *fs_usage, struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags) u64 journal_seq, unsigned flags,
bool gc)
{ {
struct bucket_mark old, new; struct bucket_mark old, new;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket *g = PTR_BUCKET(ca, &p.ptr); size_t b = PTR_BUCKET_NR(ca, &p.ptr);
struct bucket *g = __bucket(ca, b, gc);
u64 v; u64 v;
if (flags & BCH_BUCKET_MARK_GC_WILL_VISIT) {
if (journal_seq)
bucket_cmpxchg(g, new, ({
new.journal_seq_valid = 1;
new.journal_seq = journal_seq;
}));
return;
}
v = atomic64_read(&g->_mark.v); v = atomic64_read(&g->_mark.v);
do { do {
new.v.counter = old.v.counter = v; new.v.counter = old.v.counter = v;
...@@ -637,10 +633,9 @@ static void bch2_mark_pointer(struct bch_fs *c, ...@@ -637,10 +633,9 @@ static void bch2_mark_pointer(struct bch_fs *c,
old.v.counter, old.v.counter,
new.v.counter)) != old.v.counter); new.v.counter)) != old.v.counter);
bch2_dev_usage_update(c, ca, fs_usage, old, new); bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) && BUG_ON(!gc && bucket_became_unavailable(old, new));
bucket_became_unavailable(c, old, new));
} }
static void bch2_mark_stripe_ptr(struct bch_fs *c, static void bch2_mark_stripe_ptr(struct bch_fs *c,
...@@ -688,9 +683,9 @@ static void bch2_mark_stripe_ptr(struct bch_fs *c, ...@@ -688,9 +683,9 @@ static void bch2_mark_stripe_ptr(struct bch_fs *c,
static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
s64 sectors, enum bch_data_type data_type, s64 sectors, enum bch_data_type data_type,
struct gc_pos pos,
struct bch_fs_usage *stats, struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags) u64 journal_seq, unsigned flags,
bool gc)
{ {
BUG_ON(!sectors); BUG_ON(!sectors);
...@@ -712,7 +707,7 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, ...@@ -712,7 +707,7 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
s64 adjusted_disk_sectors = disk_sectors; s64 adjusted_disk_sectors = disk_sectors;
bch2_mark_pointer(c, e, p, disk_sectors, data_type, bch2_mark_pointer(c, e, p, disk_sectors, data_type,
stats, journal_seq, flags); stats, journal_seq, flags, gc);
if (!p.ptr.cached) if (!p.ptr.cached)
for (i = 0; i < p.ec_nr; i++) for (i = 0; i < p.ec_nr; i++)
...@@ -758,21 +753,20 @@ static void bucket_set_stripe(struct bch_fs *c, ...@@ -758,21 +753,20 @@ static void bucket_set_stripe(struct bch_fs *c,
const struct bch_stripe *v, const struct bch_stripe *v,
bool enabled, bool enabled,
struct bch_fs_usage *fs_usage, struct bch_fs_usage *fs_usage,
u64 journal_seq) u64 journal_seq,
bool gc)
{ {
unsigned i; unsigned i;
for (i = 0; i < v->nr_blocks; i++) { for (i = 0; i < v->nr_blocks; i++) {
const struct bch_extent_ptr *ptr = v->ptrs + i; const struct bch_extent_ptr *ptr = v->ptrs + i;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bucket *g; size_t b = PTR_BUCKET_NR(ca, ptr);
struct bucket *g = __bucket(ca, b, gc);
struct bucket_mark new, old; struct bucket_mark new, old;
BUG_ON(ptr_stale(ca, ptr)); BUG_ON(ptr_stale(ca, ptr));
rcu_read_lock();
g = PTR_BUCKET(ca, ptr);
old = bucket_cmpxchg(g, new, ({ old = bucket_cmpxchg(g, new, ({
new.stripe = enabled; new.stripe = enabled;
if (journal_seq) { if (journal_seq) {
...@@ -780,18 +774,18 @@ static void bucket_set_stripe(struct bch_fs *c, ...@@ -780,18 +774,18 @@ static void bucket_set_stripe(struct bch_fs *c,
new.journal_seq = journal_seq; new.journal_seq = journal_seq;
} }
})); }));
rcu_read_unlock();
BUG_ON(old.stripe == enabled); BUG_ON(old.stripe == enabled);
bch2_dev_usage_update(c, ca, fs_usage, old, new); bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
} }
} }
static void bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k, static void bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
bool inserting, struct gc_pos pos, bool inserting,
struct bch_fs_usage *fs_usage, struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags) u64 journal_seq, unsigned flags,
bool gc)
{ {
switch (k.k->type) { switch (k.k->type) {
case BCH_STRIPE: { case BCH_STRIPE: {
...@@ -820,74 +814,64 @@ static void bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k, ...@@ -820,74 +814,64 @@ static void bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
else else
bch2_stripes_heap_del(c, m, idx); bch2_stripes_heap_del(c, m, idx);
bucket_set_stripe(c, s.v, inserting, fs_usage, 0); bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc);
break; break;
} }
} }
} }
void bch2_mark_key(struct bch_fs *c, static void __bch2_mark_key(struct bch_fs *c,
enum bkey_type type, struct bkey_s_c k, enum bkey_type type, struct bkey_s_c k,
bool inserting, s64 sectors, bool inserting, s64 sectors,
struct gc_pos pos, struct bch_fs_usage *stats,
struct bch_fs_usage *stats, u64 journal_seq, unsigned flags,
u64 journal_seq, unsigned flags) bool gc)
{ {
/*
* synchronization w.r.t. GC:
*
* Normally, bucket sector counts/marks are updated on the fly, as
* references are added/removed from the btree, the lists of buckets the
* allocator owns, other metadata buckets, etc.
*
* When GC is in progress and going to mark this reference, we do _not_
* mark this reference here, to avoid double counting - GC will count it
* when it gets to it.
*
* To know whether we should mark a given reference (GC either isn't
* running, or has already marked references at this position) we
* construct a total order for everything GC walks. Then, we can simply
* compare the position of the reference we're marking - @pos - with
* GC's current position. If GC is going to mark this reference, GC's
* current position will be less than @pos; if GC's current position is
* greater than @pos GC has either already walked this position, or
* isn't running.
*
* To avoid racing with GC's position changing, we have to deal with
* - GC's position being set to GC_POS_MIN when GC starts:
* usage_lock guards against this
* - GC's position overtaking @pos: we guard against this with
* whatever lock protects the data structure the reference lives in
* (e.g. the btree node lock, or the relevant allocator lock).
*/
percpu_down_read(&c->usage_lock);
if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
gc_will_visit(c, pos))
flags |= BCH_BUCKET_MARK_GC_WILL_VISIT;
if (!stats)
stats = this_cpu_ptr(c->usage_percpu);
switch (type) { switch (type) {
case BKEY_TYPE_BTREE: case BKEY_TYPE_BTREE:
bch2_mark_extent(c, k, inserting bch2_mark_extent(c, k, inserting
? c->opts.btree_node_size ? c->opts.btree_node_size
: -c->opts.btree_node_size, : -c->opts.btree_node_size,
BCH_DATA_BTREE, BCH_DATA_BTREE,
pos, stats, journal_seq, flags); stats, journal_seq, flags, gc);
break; break;
case BKEY_TYPE_EXTENTS: case BKEY_TYPE_EXTENTS:
bch2_mark_extent(c, k, sectors, BCH_DATA_USER, bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
pos, stats, journal_seq, flags); stats, journal_seq, flags, gc);
break; break;
case BKEY_TYPE_EC: case BKEY_TYPE_EC:
bch2_mark_stripe(c, k, inserting, bch2_mark_stripe(c, k, inserting,
pos, stats, journal_seq, flags); stats, journal_seq, flags, gc);
break; break;
default: default:
break; break;
} }
}
void bch2_mark_key(struct bch_fs *c,
enum bkey_type type, struct bkey_s_c k,
bool inserting, s64 sectors,
struct gc_pos pos,
struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags)
{
percpu_down_read(&c->usage_lock);
if (!(flags & BCH_BUCKET_MARK_GC)) {
if (!stats)
stats = this_cpu_ptr(c->usage[0]);
__bch2_mark_key(c, type, k, inserting, sectors,
stats, journal_seq, flags, false);
}
if ((flags & BCH_BUCKET_MARK_GC) ||
gc_visited(c, pos)) {
__bch2_mark_key(c, type, k, inserting, sectors,
this_cpu_ptr(c->usage[1]),
journal_seq, flags, true);
}
percpu_up_read(&c->usage_lock); percpu_up_read(&c->usage_lock);
} }
...@@ -963,28 +947,20 @@ void bch2_mark_update(struct btree_insert *trans, ...@@ -963,28 +947,20 @@ void bch2_mark_update(struct btree_insert *trans,
/* Disk reservations: */ /* Disk reservations: */
static u64 __recalc_sectors_available(struct bch_fs *c) static u64 bch2_recalc_sectors_available(struct bch_fs *c)
{ {
int cpu; int cpu;
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
per_cpu_ptr(c->usage_percpu, cpu)->available_cache = 0; per_cpu_ptr(c->usage[0], cpu)->available_cache = 0;
return avail_factor(bch2_fs_sectors_free(c, bch2_fs_usage_read(c))); return avail_factor(bch2_fs_sectors_free(c, bch2_fs_usage_read(c)));
} }
/* Used by gc when it's starting: */
void bch2_recalc_sectors_available(struct bch_fs *c)
{
percpu_down_write(&c->usage_lock);
atomic64_set(&c->sectors_available, __recalc_sectors_available(c));
percpu_up_write(&c->usage_lock);
}
void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res) void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
{ {
percpu_down_read(&c->usage_lock); percpu_down_read(&c->usage_lock);
this_cpu_sub(c->usage_percpu->online_reserved, this_cpu_sub(c->usage[0]->online_reserved,
res->sectors); res->sectors);
bch2_fs_stats_verify(c); bch2_fs_stats_verify(c);
...@@ -1005,7 +981,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, ...@@ -1005,7 +981,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
percpu_down_read(&c->usage_lock); percpu_down_read(&c->usage_lock);
preempt_disable(); preempt_disable();
stats = this_cpu_ptr(c->usage_percpu); stats = this_cpu_ptr(c->usage[0]);
if (sectors <= stats->available_cache) if (sectors <= stats->available_cache)
goto out; goto out;
...@@ -1055,7 +1031,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, ...@@ -1055,7 +1031,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
} }
percpu_down_write(&c->usage_lock); percpu_down_write(&c->usage_lock);
sectors_available = __recalc_sectors_available(c); sectors_available = bch2_recalc_sectors_available(c);
if (sectors <= sectors_available || if (sectors <= sectors_available ||
(flags & BCH_DISK_RESERVATION_NOFAIL)) { (flags & BCH_DISK_RESERVATION_NOFAIL)) {
...@@ -1110,7 +1086,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ...@@ -1110,7 +1086,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 7); size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 7);
size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12), size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12),
btree_reserve); btree_reserve);
bool resize = ca->buckets != NULL, bool resize = ca->buckets[0] != NULL,
start_copygc = ca->copygc_thread != NULL; start_copygc = ca->copygc_thread != NULL;
int ret = -ENOMEM; int ret = -ENOMEM;
unsigned i; unsigned i;
...@@ -1170,7 +1146,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ...@@ -1170,7 +1146,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
BITS_TO_LONGS(n) * sizeof(unsigned long)); BITS_TO_LONGS(n) * sizeof(unsigned long));
} }
rcu_assign_pointer(ca->buckets, buckets); rcu_assign_pointer(ca->buckets[0], buckets);
buckets = old_buckets; buckets = old_buckets;
swap(ca->oldest_gens, oldest_gens); swap(ca->oldest_gens, oldest_gens);
...@@ -1239,16 +1215,16 @@ void bch2_dev_buckets_free(struct bch_dev *ca) ...@@ -1239,16 +1215,16 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
kvpfree(ca->buckets_dirty, kvpfree(ca->buckets_dirty,
BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8)); kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8));
kvpfree(rcu_dereference_protected(ca->buckets, 1), kvpfree(rcu_dereference_protected(ca->buckets[0], 1),
sizeof(struct bucket_array) + sizeof(struct bucket_array) +
ca->mi.nbuckets * sizeof(struct bucket)); ca->mi.nbuckets * sizeof(struct bucket));
free_percpu(ca->usage_percpu); free_percpu(ca->usage[0]);
} }
int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
{ {
if (!(ca->usage_percpu = alloc_percpu(struct bch_dev_usage))) if (!(ca->usage[0] = alloc_percpu(struct bch_dev_usage)))
return -ENOMEM; return -ENOMEM;
return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);; return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);;
......
...@@ -29,23 +29,34 @@ ...@@ -29,23 +29,34 @@
_old; \ _old; \
}) })
static inline struct bucket_array *bucket_array(struct bch_dev *ca) static inline struct bucket_array *__bucket_array(struct bch_dev *ca,
bool gc)
{ {
return rcu_dereference_check(ca->buckets, return rcu_dereference_check(ca->buckets[gc],
!ca->fs || !ca->fs ||
percpu_rwsem_is_held(&ca->fs->usage_lock) || percpu_rwsem_is_held(&ca->fs->usage_lock) ||
lockdep_is_held(&ca->fs->gc_lock) || lockdep_is_held(&ca->fs->gc_lock) ||
lockdep_is_held(&ca->bucket_lock)); lockdep_is_held(&ca->bucket_lock));
} }
static inline struct bucket *bucket(struct bch_dev *ca, size_t b) static inline struct bucket_array *bucket_array(struct bch_dev *ca)
{
return __bucket_array(ca, false);
}
static inline struct bucket *__bucket(struct bch_dev *ca, size_t b, bool gc)
{ {
struct bucket_array *buckets = bucket_array(ca); struct bucket_array *buckets = __bucket_array(ca, gc);
BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets); BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets);
return buckets->b + b; return buckets->b + b;
} }
static inline struct bucket *bucket(struct bch_dev *ca, size_t b)
{
return __bucket(ca, b, false);
}
static inline void bucket_io_clock_reset(struct bch_fs *c, struct bch_dev *ca, static inline void bucket_io_clock_reset(struct bch_fs *c, struct bch_dev *ca,
size_t b, int rw) size_t b, int rw)
{ {
...@@ -129,7 +140,7 @@ static inline bool bucket_unused(struct bucket_mark mark) ...@@ -129,7 +140,7 @@ static inline bool bucket_unused(struct bucket_mark mark)
/* Device usage: */ /* Device usage: */
struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *); struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *, bool);
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *); struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
static inline u64 __dev_buckets_available(struct bch_dev *ca, static inline u64 __dev_buckets_available(struct bch_dev *ca,
...@@ -168,7 +179,7 @@ static inline u64 dev_buckets_free(struct bch_fs *c, struct bch_dev *ca) ...@@ -168,7 +179,7 @@ static inline u64 dev_buckets_free(struct bch_fs *c, struct bch_dev *ca)
/* Filesystem usage: */ /* Filesystem usage: */
struct bch_fs_usage __bch2_fs_usage_read(struct bch_fs *); struct bch_fs_usage __bch2_fs_usage_read(struct bch_fs *, bool);
struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *); struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *);
void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *, void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
struct disk_reservation *, struct gc_pos); struct disk_reservation *, struct gc_pos);
...@@ -207,17 +218,13 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, ...@@ -207,17 +218,13 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
struct gc_pos, unsigned); struct gc_pos, unsigned);
#define BCH_BUCKET_MARK_NOATOMIC (1 << 0) #define BCH_BUCKET_MARK_NOATOMIC (1 << 0)
#define BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE (1 << 1) #define BCH_BUCKET_MARK_GC (1 << 1)
#define BCH_BUCKET_MARK_GC_WILL_VISIT (1 << 2)
#define BCH_BUCKET_MARK_GC_LOCK_HELD (1 << 3)
void bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c, void bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c,
bool, s64, struct gc_pos, bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned); struct bch_fs_usage *, u64, unsigned);
void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *); void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *);
void bch2_recalc_sectors_available(struct bch_fs *);
void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *); void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
static inline void bch2_disk_reservation_put(struct bch_fs *c, static inline void bch2_disk_reservation_put(struct bch_fs *c,
......
...@@ -64,8 +64,6 @@ struct bch_dev_usage { ...@@ -64,8 +64,6 @@ struct bch_dev_usage {
struct bch_fs_usage { struct bch_fs_usage {
/* all fields are in units of 512 byte sectors: */ /* all fields are in units of 512 byte sectors: */
u64 online_reserved;
u64 available_cache;
struct { struct {
u64 data[BCH_DATA_NR]; u64 data[BCH_DATA_NR];
...@@ -74,6 +72,10 @@ struct bch_fs_usage { ...@@ -74,6 +72,10 @@ struct bch_fs_usage {
} replicas[BCH_REPLICAS_MAX]; } replicas[BCH_REPLICAS_MAX];
u64 buckets[BCH_DATA_NR]; u64 buckets[BCH_DATA_NR];
/* fields starting here aren't touched by gc: */
u64 online_reserved;
u64 available_cache;
}; };
/* /*
......
...@@ -782,9 +782,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, ...@@ -782,9 +782,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL, bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
ca->mi.bucket_size, ca->mi.bucket_size,
gc_phase(GC_PHASE_SB), gc_phase(GC_PHASE_SB),
new_fs 0);
? BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE
: 0);
if (c) { if (c) {
spin_unlock(&c->journal.lock); spin_unlock(&c->journal.lock);
......
...@@ -374,7 +374,7 @@ static void bch2_fs_free(struct bch_fs *c) ...@@ -374,7 +374,7 @@ static void bch2_fs_free(struct bch_fs *c)
bch2_io_clock_exit(&c->io_clock[READ]); bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c); bch2_fs_compress_exit(c);
percpu_free_rwsem(&c->usage_lock); percpu_free_rwsem(&c->usage_lock);
free_percpu(c->usage_percpu); free_percpu(c->usage[0]);
mempool_exit(&c->btree_iters_pool); mempool_exit(&c->btree_iters_pool);
mempool_exit(&c->btree_bounce_pool); mempool_exit(&c->btree_bounce_pool);
bioset_exit(&c->btree_bio); bioset_exit(&c->btree_bio);
...@@ -606,7 +606,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ...@@ -606,7 +606,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
max(offsetof(struct btree_read_bio, bio), max(offsetof(struct btree_read_bio, bio),
offsetof(struct btree_write_bio, wbio.bio)), offsetof(struct btree_write_bio, wbio.bio)),
BIOSET_NEED_BVECS) || BIOSET_NEED_BVECS) ||
!(c->usage_percpu = alloc_percpu(struct bch_fs_usage)) || !(c->usage[0] = alloc_percpu(struct bch_fs_usage)) ||
percpu_init_rwsem(&c->usage_lock) || percpu_init_rwsem(&c->usage_lock) ||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
btree_bytes(c)) || btree_bytes(c)) ||
...@@ -1028,8 +1028,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) ...@@ -1028,8 +1028,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
return ret; return ret;
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
bch2_mark_dev_superblock(ca->fs, ca, bch2_mark_dev_superblock(ca->fs, ca, 0);
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
bch2_dev_sysfs_online(c, ca); bch2_dev_sysfs_online(c, ca);
...@@ -1314,7 +1313,7 @@ static void dev_usage_clear(struct bch_dev *ca) ...@@ -1314,7 +1313,7 @@ static void dev_usage_clear(struct bch_dev *ca)
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct bch_dev_usage *p = struct bch_dev_usage *p =
per_cpu_ptr(ca->usage_percpu, cpu); per_cpu_ptr(ca->usage[0], cpu);
memset(p, 0, sizeof(*p)); memset(p, 0, sizeof(*p));
} }
...@@ -1375,8 +1374,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) ...@@ -1375,8 +1374,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
* allocate the journal, reset all the marks, then remark after we * allocate the journal, reset all the marks, then remark after we
* attach... * attach...
*/ */
bch2_mark_dev_superblock(ca->fs, ca, bch2_mark_dev_superblock(ca->fs, ca, 0);
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
err = "journal alloc failed"; err = "journal alloc failed";
ret = bch2_dev_journal_alloc(ca); ret = bch2_dev_journal_alloc(ca);
...@@ -1435,8 +1433,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) ...@@ -1435,8 +1433,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
ca->disk_sb.sb->dev_idx = dev_idx; ca->disk_sb.sb->dev_idx = dev_idx;
bch2_dev_attach(c, ca, dev_idx); bch2_dev_attach(c, ca, dev_idx);
bch2_mark_dev_superblock(c, ca, bch2_mark_dev_superblock(c, ca, 0);
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
bch2_write_super(c); bch2_write_super(c);
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
......
...@@ -478,7 +478,7 @@ STORE(__bch2_fs) ...@@ -478,7 +478,7 @@ STORE(__bch2_fs)
bch2_coalesce(c); bch2_coalesce(c);
if (attr == &sysfs_trigger_gc) if (attr == &sysfs_trigger_gc)
bch2_gc(c); bch2_gc(c, NULL, false);
if (attr == &sysfs_prune_cache) { if (attr == &sysfs_prune_cache) {
struct shrink_control sc; struct shrink_control sc;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment