Commit 9ca53b55 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: gc now operates on second set of bucket marks

This means we can now use gc to verify the allocation information -
important for testing persistant alloc info
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent e6473691
......@@ -930,12 +930,6 @@ static int bch2_allocator_thread(void *arg)
pr_debug("free_inc now empty");
do {
if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) {
up_read(&c->gc_lock);
bch_err(ca, "gc failure");
goto stop;
}
/*
* Find some buckets that we can invalidate, either
* they're completely unused, or only contain clean data
......@@ -1293,9 +1287,6 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
bool invalidating_data = false;
int ret = 0;
if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
return -1;
if (test_alloc_startup(c)) {
invalidating_data = true;
goto not_enough;
......@@ -1321,9 +1312,7 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
continue;
bch2_mark_alloc_bucket(c, ca, bu, true,
gc_pos_alloc(c, NULL),
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD);
gc_pos_alloc(c, NULL), 0);
fifo_push(&ca->free_inc, bu);
......
......@@ -347,7 +347,6 @@ enum gc_phase {
GC_PHASE_PENDING_DELETE,
GC_PHASE_ALLOC,
GC_PHASE_DONE
};
struct gc_pos {
......@@ -392,15 +391,14 @@ struct bch_dev {
* gc_lock, for device resize - holding any is sufficient for access:
* Or rcu_read_lock(), but only for ptr_stale():
*/
struct bucket_array __rcu *buckets;
struct bucket_array __rcu *buckets[2];
unsigned long *buckets_dirty;
unsigned long *buckets_written;
/* most out of date gen in the btree */
u8 *oldest_gens;
struct rw_semaphore bucket_lock;
struct bch_dev_usage __percpu *usage_percpu;
struct bch_dev_usage usage_cached;
struct bch_dev_usage __percpu *usage[2];
/* Allocator: */
struct task_struct __rcu *alloc_thread;
......@@ -478,7 +476,6 @@ enum {
/* errors: */
BCH_FS_ERROR,
BCH_FS_GC_FAILURE,
/* misc: */
BCH_FS_BDEV_MOUNTED,
......@@ -614,8 +611,8 @@ struct bch_fs {
atomic64_t sectors_available;
struct bch_fs_usage __percpu *usage_percpu;
struct bch_fs_usage usage_cached;
struct bch_fs_usage __percpu *usage[2];
struct percpu_rw_semaphore usage_lock;
struct closure_waitlist freelist_wait;
......@@ -656,9 +653,6 @@ struct bch_fs {
*
* gc_cur_phase is a superset of btree_ids (BTREE_ID_EXTENTS etc.)
*
* gc_cur_phase == GC_PHASE_DONE indicates that gc is finished/not
* currently running, and gc marks are currently valid
*
* Protected by gc_pos_lock. Only written to by GC thread, so GC thread
* can read without a lock.
*/
......
This diff is collapsed.
......@@ -7,7 +7,7 @@
enum bkey_type;
void bch2_coalesce(struct bch_fs *);
void bch2_gc(struct bch_fs *);
int bch2_gc(struct bch_fs *, struct list_head *, bool);
void bch2_gc_thread_stop(struct bch_fs *);
int bch2_gc_thread_start(struct bch_fs *);
int bch2_initial_gc(struct bch_fs *, struct list_head *);
......@@ -105,14 +105,14 @@ static inline struct gc_pos gc_pos_alloc(struct bch_fs *c, struct open_bucket *o
};
}
static inline bool gc_will_visit(struct bch_fs *c, struct gc_pos pos)
static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
{
unsigned seq;
bool ret;
do {
seq = read_seqcount_begin(&c->gc_pos_lock);
ret = gc_pos_cmp(c->gc_pos, pos) < 0;
ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
} while (read_seqcount_retry(&c->gc_pos_lock, seq));
return ret;
......
......@@ -160,7 +160,6 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
{
struct bch_fs *c = as->c;
struct pending_btree_node_free *d;
unsigned replicas;
/*
* btree_update lock is only needed here to avoid racing with
......@@ -178,15 +177,6 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
BUG_ON(d->index_update_done);
d->index_update_done = true;
/*
* Btree nodes are accounted as freed in bch_alloc_stats when they're
* freed from the index:
*/
replicas = bch2_extent_nr_dirty_ptrs(k);
if (replicas)
stats->replicas[replicas - 1].data[BCH_DATA_BTREE] -=
c->opts.btree_node_size * replicas;
/*
* We're dropping @k from the btree, but it's still live until the
* index update is persistent so we need to keep a reference around for
......@@ -208,15 +198,16 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
* bch2_mark_key() compares the current gc pos to the pos we're
* moving this reference from, hence one comparison here:
*/
if (gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) {
struct bch_fs_usage tmp = { 0 };
if (gc_pos_cmp(c->gc_pos, b
? gc_pos_btree_node(b)
: gc_pos_btree_root(as->btree_id)) >= 0 &&
gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) {
struct gc_pos pos = { 0 };
bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&d->key),
false, 0, b
? gc_pos_btree_node(b)
: gc_pos_btree_root(as->btree_id),
&tmp, 0, 0);
false, 0, pos,
NULL, 0, BCH_BUCKET_MARK_GC);
/*
* Don't apply tmp - pending deletes aren't tracked in
* bch_alloc_stats:
......@@ -287,19 +278,13 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
static void bch2_btree_node_free_ondisk(struct bch_fs *c,
struct pending_btree_node_free *pending)
{
struct bch_fs_usage stats = { 0 };
BUG_ON(!pending->index_update_done);
bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&pending->key),
false, 0,
gc_phase(GC_PHASE_PENDING_DELETE),
&stats, 0, 0);
/*
* Don't apply stats - pending deletes aren't tracked in
* bch_alloc_stats:
*/
NULL, 0, 0);
}
static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
......@@ -1939,6 +1924,25 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
btree_interior_update_add_node_reference(as, b);
/*
* XXX: the rest of the update path treats this like we're actually
* inserting a new node and deleting the existing node, so the
* reservation needs to include enough space for @b
*
* that is actually sketch as fuck though and I am surprised the code
* seems to work like that, definitely need to go back and rework it
* into something saner.
*
* (I think @b is just getting double counted until the btree update
* finishes and "deletes" @b on disk)
*/
ret = bch2_disk_reservation_add(c, &as->reserve->disk_res,
c->opts.btree_node_size *
bch2_extent_nr_ptrs(extent_i_to_s_c(new_key)),
BCH_DISK_RESERVATION_NOFAIL|
BCH_DISK_RESERVATION_GC_LOCK_HELD);
BUG_ON(ret);
parent = btree_node_parent(iter, b);
if (parent) {
if (new_hash) {
......
This diff is collapsed.
......@@ -29,23 +29,34 @@
_old; \
})
static inline struct bucket_array *bucket_array(struct bch_dev *ca)
static inline struct bucket_array *__bucket_array(struct bch_dev *ca,
bool gc)
{
return rcu_dereference_check(ca->buckets,
return rcu_dereference_check(ca->buckets[gc],
!ca->fs ||
percpu_rwsem_is_held(&ca->fs->usage_lock) ||
lockdep_is_held(&ca->fs->gc_lock) ||
lockdep_is_held(&ca->bucket_lock));
}
static inline struct bucket *bucket(struct bch_dev *ca, size_t b)
static inline struct bucket_array *bucket_array(struct bch_dev *ca)
{
return __bucket_array(ca, false);
}
static inline struct bucket *__bucket(struct bch_dev *ca, size_t b, bool gc)
{
struct bucket_array *buckets = bucket_array(ca);
struct bucket_array *buckets = __bucket_array(ca, gc);
BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets);
return buckets->b + b;
}
static inline struct bucket *bucket(struct bch_dev *ca, size_t b)
{
return __bucket(ca, b, false);
}
static inline void bucket_io_clock_reset(struct bch_fs *c, struct bch_dev *ca,
size_t b, int rw)
{
......@@ -129,7 +140,7 @@ static inline bool bucket_unused(struct bucket_mark mark)
/* Device usage: */
struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *);
struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *, bool);
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
static inline u64 __dev_buckets_available(struct bch_dev *ca,
......@@ -168,7 +179,7 @@ static inline u64 dev_buckets_free(struct bch_fs *c, struct bch_dev *ca)
/* Filesystem usage: */
struct bch_fs_usage __bch2_fs_usage_read(struct bch_fs *);
struct bch_fs_usage __bch2_fs_usage_read(struct bch_fs *, bool);
struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *);
void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
struct disk_reservation *, struct gc_pos);
......@@ -207,17 +218,13 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
struct gc_pos, unsigned);
#define BCH_BUCKET_MARK_NOATOMIC (1 << 0)
#define BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE (1 << 1)
#define BCH_BUCKET_MARK_GC_WILL_VISIT (1 << 2)
#define BCH_BUCKET_MARK_GC_LOCK_HELD (1 << 3)
#define BCH_BUCKET_MARK_GC (1 << 1)
void bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c,
bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned);
void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *);
void bch2_recalc_sectors_available(struct bch_fs *);
void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
static inline void bch2_disk_reservation_put(struct bch_fs *c,
......
......@@ -64,8 +64,6 @@ struct bch_dev_usage {
struct bch_fs_usage {
/* all fields are in units of 512 byte sectors: */
u64 online_reserved;
u64 available_cache;
struct {
u64 data[BCH_DATA_NR];
......@@ -74,6 +72,10 @@ struct bch_fs_usage {
} replicas[BCH_REPLICAS_MAX];
u64 buckets[BCH_DATA_NR];
/* fields starting here aren't touched by gc: */
u64 online_reserved;
u64 available_cache;
};
/*
......
......@@ -782,9 +782,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
ca->mi.bucket_size,
gc_phase(GC_PHASE_SB),
new_fs
? BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE
: 0);
0);
if (c) {
spin_unlock(&c->journal.lock);
......
......@@ -374,7 +374,7 @@ static void bch2_fs_free(struct bch_fs *c)
bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c);
percpu_free_rwsem(&c->usage_lock);
free_percpu(c->usage_percpu);
free_percpu(c->usage[0]);
mempool_exit(&c->btree_iters_pool);
mempool_exit(&c->btree_bounce_pool);
bioset_exit(&c->btree_bio);
......@@ -606,7 +606,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
max(offsetof(struct btree_read_bio, bio),
offsetof(struct btree_write_bio, wbio.bio)),
BIOSET_NEED_BVECS) ||
!(c->usage_percpu = alloc_percpu(struct bch_fs_usage)) ||
!(c->usage[0] = alloc_percpu(struct bch_fs_usage)) ||
percpu_init_rwsem(&c->usage_lock) ||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
btree_bytes(c)) ||
......@@ -1028,8 +1028,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
return ret;
mutex_lock(&c->sb_lock);
bch2_mark_dev_superblock(ca->fs, ca,
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
bch2_mark_dev_superblock(ca->fs, ca, 0);
mutex_unlock(&c->sb_lock);
bch2_dev_sysfs_online(c, ca);
......@@ -1314,7 +1313,7 @@ static void dev_usage_clear(struct bch_dev *ca)
for_each_possible_cpu(cpu) {
struct bch_dev_usage *p =
per_cpu_ptr(ca->usage_percpu, cpu);
per_cpu_ptr(ca->usage[0], cpu);
memset(p, 0, sizeof(*p));
}
......@@ -1375,8 +1374,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
* allocate the journal, reset all the marks, then remark after we
* attach...
*/
bch2_mark_dev_superblock(ca->fs, ca,
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
bch2_mark_dev_superblock(ca->fs, ca, 0);
err = "journal alloc failed";
ret = bch2_dev_journal_alloc(ca);
......@@ -1435,8 +1433,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
ca->disk_sb.sb->dev_idx = dev_idx;
bch2_dev_attach(c, ca, dev_idx);
bch2_mark_dev_superblock(c, ca,
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
bch2_mark_dev_superblock(c, ca, 0);
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
......
......@@ -478,7 +478,7 @@ STORE(__bch2_fs)
bch2_coalesce(c);
if (attr == &sysfs_trigger_gc)
bch2_gc(c);
bch2_gc(c, NULL, false);
if (attr == &sysfs_prune_cache) {
struct shrink_control sc;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment