Commit 9ca53b55 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: gc now operates on second set of bucket marks

This means we can now use gc to verify the allocation information -
important for testing persistant alloc info
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent e6473691
...@@ -930,12 +930,6 @@ static int bch2_allocator_thread(void *arg) ...@@ -930,12 +930,6 @@ static int bch2_allocator_thread(void *arg)
pr_debug("free_inc now empty"); pr_debug("free_inc now empty");
do { do {
if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) {
up_read(&c->gc_lock);
bch_err(ca, "gc failure");
goto stop;
}
/* /*
* Find some buckets that we can invalidate, either * Find some buckets that we can invalidate, either
* they're completely unused, or only contain clean data * they're completely unused, or only contain clean data
...@@ -1293,9 +1287,6 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) ...@@ -1293,9 +1287,6 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
bool invalidating_data = false; bool invalidating_data = false;
int ret = 0; int ret = 0;
if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
return -1;
if (test_alloc_startup(c)) { if (test_alloc_startup(c)) {
invalidating_data = true; invalidating_data = true;
goto not_enough; goto not_enough;
...@@ -1321,9 +1312,7 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) ...@@ -1321,9 +1312,7 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
continue; continue;
bch2_mark_alloc_bucket(c, ca, bu, true, bch2_mark_alloc_bucket(c, ca, bu, true,
gc_pos_alloc(c, NULL), gc_pos_alloc(c, NULL), 0);
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD);
fifo_push(&ca->free_inc, bu); fifo_push(&ca->free_inc, bu);
......
...@@ -347,7 +347,6 @@ enum gc_phase { ...@@ -347,7 +347,6 @@ enum gc_phase {
GC_PHASE_PENDING_DELETE, GC_PHASE_PENDING_DELETE,
GC_PHASE_ALLOC, GC_PHASE_ALLOC,
GC_PHASE_DONE
}; };
struct gc_pos { struct gc_pos {
...@@ -392,15 +391,14 @@ struct bch_dev { ...@@ -392,15 +391,14 @@ struct bch_dev {
* gc_lock, for device resize - holding any is sufficient for access: * gc_lock, for device resize - holding any is sufficient for access:
* Or rcu_read_lock(), but only for ptr_stale(): * Or rcu_read_lock(), but only for ptr_stale():
*/ */
struct bucket_array __rcu *buckets; struct bucket_array __rcu *buckets[2];
unsigned long *buckets_dirty; unsigned long *buckets_dirty;
unsigned long *buckets_written; unsigned long *buckets_written;
/* most out of date gen in the btree */ /* most out of date gen in the btree */
u8 *oldest_gens; u8 *oldest_gens;
struct rw_semaphore bucket_lock; struct rw_semaphore bucket_lock;
struct bch_dev_usage __percpu *usage_percpu; struct bch_dev_usage __percpu *usage[2];
struct bch_dev_usage usage_cached;
/* Allocator: */ /* Allocator: */
struct task_struct __rcu *alloc_thread; struct task_struct __rcu *alloc_thread;
...@@ -478,7 +476,6 @@ enum { ...@@ -478,7 +476,6 @@ enum {
/* errors: */ /* errors: */
BCH_FS_ERROR, BCH_FS_ERROR,
BCH_FS_GC_FAILURE,
/* misc: */ /* misc: */
BCH_FS_BDEV_MOUNTED, BCH_FS_BDEV_MOUNTED,
...@@ -614,8 +611,8 @@ struct bch_fs { ...@@ -614,8 +611,8 @@ struct bch_fs {
atomic64_t sectors_available; atomic64_t sectors_available;
struct bch_fs_usage __percpu *usage_percpu; struct bch_fs_usage __percpu *usage[2];
struct bch_fs_usage usage_cached;
struct percpu_rw_semaphore usage_lock; struct percpu_rw_semaphore usage_lock;
struct closure_waitlist freelist_wait; struct closure_waitlist freelist_wait;
...@@ -656,9 +653,6 @@ struct bch_fs { ...@@ -656,9 +653,6 @@ struct bch_fs {
* *
* gc_cur_phase is a superset of btree_ids (BTREE_ID_EXTENTS etc.) * gc_cur_phase is a superset of btree_ids (BTREE_ID_EXTENTS etc.)
* *
* gc_cur_phase == GC_PHASE_DONE indicates that gc is finished/not
* currently running, and gc marks are currently valid
*
* Protected by gc_pos_lock. Only written to by GC thread, so GC thread * Protected by gc_pos_lock. Only written to by GC thread, so GC thread
* can read without a lock. * can read without a lock.
*/ */
......
This diff is collapsed.
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
enum bkey_type; enum bkey_type;
void bch2_coalesce(struct bch_fs *); void bch2_coalesce(struct bch_fs *);
void bch2_gc(struct bch_fs *); int bch2_gc(struct bch_fs *, struct list_head *, bool);
void bch2_gc_thread_stop(struct bch_fs *); void bch2_gc_thread_stop(struct bch_fs *);
int bch2_gc_thread_start(struct bch_fs *); int bch2_gc_thread_start(struct bch_fs *);
int bch2_initial_gc(struct bch_fs *, struct list_head *); int bch2_initial_gc(struct bch_fs *, struct list_head *);
...@@ -105,14 +105,14 @@ static inline struct gc_pos gc_pos_alloc(struct bch_fs *c, struct open_bucket *o ...@@ -105,14 +105,14 @@ static inline struct gc_pos gc_pos_alloc(struct bch_fs *c, struct open_bucket *o
}; };
} }
static inline bool gc_will_visit(struct bch_fs *c, struct gc_pos pos) static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
{ {
unsigned seq; unsigned seq;
bool ret; bool ret;
do { do {
seq = read_seqcount_begin(&c->gc_pos_lock); seq = read_seqcount_begin(&c->gc_pos_lock);
ret = gc_pos_cmp(c->gc_pos, pos) < 0; ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
} while (read_seqcount_retry(&c->gc_pos_lock, seq)); } while (read_seqcount_retry(&c->gc_pos_lock, seq));
return ret; return ret;
......
...@@ -160,7 +160,6 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b, ...@@ -160,7 +160,6 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
{ {
struct bch_fs *c = as->c; struct bch_fs *c = as->c;
struct pending_btree_node_free *d; struct pending_btree_node_free *d;
unsigned replicas;
/* /*
* btree_update lock is only needed here to avoid racing with * btree_update lock is only needed here to avoid racing with
...@@ -178,15 +177,6 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b, ...@@ -178,15 +177,6 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
BUG_ON(d->index_update_done); BUG_ON(d->index_update_done);
d->index_update_done = true; d->index_update_done = true;
/*
* Btree nodes are accounted as freed in bch_alloc_stats when they're
* freed from the index:
*/
replicas = bch2_extent_nr_dirty_ptrs(k);
if (replicas)
stats->replicas[replicas - 1].data[BCH_DATA_BTREE] -=
c->opts.btree_node_size * replicas;
/* /*
* We're dropping @k from the btree, but it's still live until the * We're dropping @k from the btree, but it's still live until the
* index update is persistent so we need to keep a reference around for * index update is persistent so we need to keep a reference around for
...@@ -208,15 +198,16 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b, ...@@ -208,15 +198,16 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
* bch2_mark_key() compares the current gc pos to the pos we're * bch2_mark_key() compares the current gc pos to the pos we're
* moving this reference from, hence one comparison here: * moving this reference from, hence one comparison here:
*/ */
if (gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) { if (gc_pos_cmp(c->gc_pos, b
struct bch_fs_usage tmp = { 0 }; ? gc_pos_btree_node(b)
: gc_pos_btree_root(as->btree_id)) >= 0 &&
gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) {
struct gc_pos pos = { 0 };
bch2_mark_key(c, BKEY_TYPE_BTREE, bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&d->key), bkey_i_to_s_c(&d->key),
false, 0, b false, 0, pos,
? gc_pos_btree_node(b) NULL, 0, BCH_BUCKET_MARK_GC);
: gc_pos_btree_root(as->btree_id),
&tmp, 0, 0);
/* /*
* Don't apply tmp - pending deletes aren't tracked in * Don't apply tmp - pending deletes aren't tracked in
* bch_alloc_stats: * bch_alloc_stats:
...@@ -287,19 +278,13 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b, ...@@ -287,19 +278,13 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
static void bch2_btree_node_free_ondisk(struct bch_fs *c, static void bch2_btree_node_free_ondisk(struct bch_fs *c,
struct pending_btree_node_free *pending) struct pending_btree_node_free *pending)
{ {
struct bch_fs_usage stats = { 0 };
BUG_ON(!pending->index_update_done); BUG_ON(!pending->index_update_done);
bch2_mark_key(c, BKEY_TYPE_BTREE, bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&pending->key), bkey_i_to_s_c(&pending->key),
false, 0, false, 0,
gc_phase(GC_PHASE_PENDING_DELETE), gc_phase(GC_PHASE_PENDING_DELETE),
&stats, 0, 0); NULL, 0, 0);
/*
* Don't apply stats - pending deletes aren't tracked in
* bch_alloc_stats:
*/
} }
static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
...@@ -1939,6 +1924,25 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, ...@@ -1939,6 +1924,25 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
btree_interior_update_add_node_reference(as, b); btree_interior_update_add_node_reference(as, b);
/*
* XXX: the rest of the update path treats this like we're actually
* inserting a new node and deleting the existing node, so the
* reservation needs to include enough space for @b
*
* that is actually sketch as fuck though and I am surprised the code
* seems to work like that, definitely need to go back and rework it
* into something saner.
*
* (I think @b is just getting double counted until the btree update
* finishes and "deletes" @b on disk)
*/
ret = bch2_disk_reservation_add(c, &as->reserve->disk_res,
c->opts.btree_node_size *
bch2_extent_nr_ptrs(extent_i_to_s_c(new_key)),
BCH_DISK_RESERVATION_NOFAIL|
BCH_DISK_RESERVATION_GC_LOCK_HELD);
BUG_ON(ret);
parent = btree_node_parent(iter, b); parent = btree_node_parent(iter, b);
if (parent) { if (parent) {
if (new_hash) { if (new_hash) {
......
This diff is collapsed.
...@@ -29,23 +29,34 @@ ...@@ -29,23 +29,34 @@
_old; \ _old; \
}) })
static inline struct bucket_array *bucket_array(struct bch_dev *ca) static inline struct bucket_array *__bucket_array(struct bch_dev *ca,
bool gc)
{ {
return rcu_dereference_check(ca->buckets, return rcu_dereference_check(ca->buckets[gc],
!ca->fs || !ca->fs ||
percpu_rwsem_is_held(&ca->fs->usage_lock) || percpu_rwsem_is_held(&ca->fs->usage_lock) ||
lockdep_is_held(&ca->fs->gc_lock) || lockdep_is_held(&ca->fs->gc_lock) ||
lockdep_is_held(&ca->bucket_lock)); lockdep_is_held(&ca->bucket_lock));
} }
static inline struct bucket *bucket(struct bch_dev *ca, size_t b) static inline struct bucket_array *bucket_array(struct bch_dev *ca)
{
return __bucket_array(ca, false);
}
static inline struct bucket *__bucket(struct bch_dev *ca, size_t b, bool gc)
{ {
struct bucket_array *buckets = bucket_array(ca); struct bucket_array *buckets = __bucket_array(ca, gc);
BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets); BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets);
return buckets->b + b; return buckets->b + b;
} }
static inline struct bucket *bucket(struct bch_dev *ca, size_t b)
{
return __bucket(ca, b, false);
}
static inline void bucket_io_clock_reset(struct bch_fs *c, struct bch_dev *ca, static inline void bucket_io_clock_reset(struct bch_fs *c, struct bch_dev *ca,
size_t b, int rw) size_t b, int rw)
{ {
...@@ -129,7 +140,7 @@ static inline bool bucket_unused(struct bucket_mark mark) ...@@ -129,7 +140,7 @@ static inline bool bucket_unused(struct bucket_mark mark)
/* Device usage: */ /* Device usage: */
struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *); struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *, bool);
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *); struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
static inline u64 __dev_buckets_available(struct bch_dev *ca, static inline u64 __dev_buckets_available(struct bch_dev *ca,
...@@ -168,7 +179,7 @@ static inline u64 dev_buckets_free(struct bch_fs *c, struct bch_dev *ca) ...@@ -168,7 +179,7 @@ static inline u64 dev_buckets_free(struct bch_fs *c, struct bch_dev *ca)
/* Filesystem usage: */ /* Filesystem usage: */
struct bch_fs_usage __bch2_fs_usage_read(struct bch_fs *); struct bch_fs_usage __bch2_fs_usage_read(struct bch_fs *, bool);
struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *); struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *);
void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *, void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
struct disk_reservation *, struct gc_pos); struct disk_reservation *, struct gc_pos);
...@@ -207,17 +218,13 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, ...@@ -207,17 +218,13 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
struct gc_pos, unsigned); struct gc_pos, unsigned);
#define BCH_BUCKET_MARK_NOATOMIC (1 << 0) #define BCH_BUCKET_MARK_NOATOMIC (1 << 0)
#define BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE (1 << 1) #define BCH_BUCKET_MARK_GC (1 << 1)
#define BCH_BUCKET_MARK_GC_WILL_VISIT (1 << 2)
#define BCH_BUCKET_MARK_GC_LOCK_HELD (1 << 3)
void bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c, void bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c,
bool, s64, struct gc_pos, bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned); struct bch_fs_usage *, u64, unsigned);
void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *); void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *);
void bch2_recalc_sectors_available(struct bch_fs *);
void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *); void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
static inline void bch2_disk_reservation_put(struct bch_fs *c, static inline void bch2_disk_reservation_put(struct bch_fs *c,
......
...@@ -64,8 +64,6 @@ struct bch_dev_usage { ...@@ -64,8 +64,6 @@ struct bch_dev_usage {
struct bch_fs_usage { struct bch_fs_usage {
/* all fields are in units of 512 byte sectors: */ /* all fields are in units of 512 byte sectors: */
u64 online_reserved;
u64 available_cache;
struct { struct {
u64 data[BCH_DATA_NR]; u64 data[BCH_DATA_NR];
...@@ -74,6 +72,10 @@ struct bch_fs_usage { ...@@ -74,6 +72,10 @@ struct bch_fs_usage {
} replicas[BCH_REPLICAS_MAX]; } replicas[BCH_REPLICAS_MAX];
u64 buckets[BCH_DATA_NR]; u64 buckets[BCH_DATA_NR];
/* fields starting here aren't touched by gc: */
u64 online_reserved;
u64 available_cache;
}; };
/* /*
......
...@@ -782,9 +782,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, ...@@ -782,9 +782,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL, bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
ca->mi.bucket_size, ca->mi.bucket_size,
gc_phase(GC_PHASE_SB), gc_phase(GC_PHASE_SB),
new_fs 0);
? BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE
: 0);
if (c) { if (c) {
spin_unlock(&c->journal.lock); spin_unlock(&c->journal.lock);
......
...@@ -374,7 +374,7 @@ static void bch2_fs_free(struct bch_fs *c) ...@@ -374,7 +374,7 @@ static void bch2_fs_free(struct bch_fs *c)
bch2_io_clock_exit(&c->io_clock[READ]); bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c); bch2_fs_compress_exit(c);
percpu_free_rwsem(&c->usage_lock); percpu_free_rwsem(&c->usage_lock);
free_percpu(c->usage_percpu); free_percpu(c->usage[0]);
mempool_exit(&c->btree_iters_pool); mempool_exit(&c->btree_iters_pool);
mempool_exit(&c->btree_bounce_pool); mempool_exit(&c->btree_bounce_pool);
bioset_exit(&c->btree_bio); bioset_exit(&c->btree_bio);
...@@ -606,7 +606,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ...@@ -606,7 +606,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
max(offsetof(struct btree_read_bio, bio), max(offsetof(struct btree_read_bio, bio),
offsetof(struct btree_write_bio, wbio.bio)), offsetof(struct btree_write_bio, wbio.bio)),
BIOSET_NEED_BVECS) || BIOSET_NEED_BVECS) ||
!(c->usage_percpu = alloc_percpu(struct bch_fs_usage)) || !(c->usage[0] = alloc_percpu(struct bch_fs_usage)) ||
percpu_init_rwsem(&c->usage_lock) || percpu_init_rwsem(&c->usage_lock) ||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
btree_bytes(c)) || btree_bytes(c)) ||
...@@ -1028,8 +1028,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) ...@@ -1028,8 +1028,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
return ret; return ret;
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
bch2_mark_dev_superblock(ca->fs, ca, bch2_mark_dev_superblock(ca->fs, ca, 0);
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
bch2_dev_sysfs_online(c, ca); bch2_dev_sysfs_online(c, ca);
...@@ -1314,7 +1313,7 @@ static void dev_usage_clear(struct bch_dev *ca) ...@@ -1314,7 +1313,7 @@ static void dev_usage_clear(struct bch_dev *ca)
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct bch_dev_usage *p = struct bch_dev_usage *p =
per_cpu_ptr(ca->usage_percpu, cpu); per_cpu_ptr(ca->usage[0], cpu);
memset(p, 0, sizeof(*p)); memset(p, 0, sizeof(*p));
} }
...@@ -1375,8 +1374,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) ...@@ -1375,8 +1374,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
* allocate the journal, reset all the marks, then remark after we * allocate the journal, reset all the marks, then remark after we
* attach... * attach...
*/ */
bch2_mark_dev_superblock(ca->fs, ca, bch2_mark_dev_superblock(ca->fs, ca, 0);
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
err = "journal alloc failed"; err = "journal alloc failed";
ret = bch2_dev_journal_alloc(ca); ret = bch2_dev_journal_alloc(ca);
...@@ -1435,8 +1433,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) ...@@ -1435,8 +1433,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
ca->disk_sb.sb->dev_idx = dev_idx; ca->disk_sb.sb->dev_idx = dev_idx;
bch2_dev_attach(c, ca, dev_idx); bch2_dev_attach(c, ca, dev_idx);
bch2_mark_dev_superblock(c, ca, bch2_mark_dev_superblock(c, ca, 0);
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
bch2_write_super(c); bch2_write_super(c);
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
......
...@@ -478,7 +478,7 @@ STORE(__bch2_fs) ...@@ -478,7 +478,7 @@ STORE(__bch2_fs)
bch2_coalesce(c); bch2_coalesce(c);
if (attr == &sysfs_trigger_gc) if (attr == &sysfs_trigger_gc)
bch2_gc(c); bch2_gc(c, NULL, false);
if (attr == &sysfs_prune_cache) { if (attr == &sysfs_prune_cache) {
struct shrink_control sc; struct shrink_control sc;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment