Commit 61274e9d authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Allocator startup improvements

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent a9ec3454
...@@ -237,6 +237,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca, ...@@ -237,6 +237,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
__BKEY_PADDED(k, DIV_ROUND_UP(sizeof(struct bch_alloc), 8)) alloc_key; __BKEY_PADDED(k, DIV_ROUND_UP(sizeof(struct bch_alloc), 8)) alloc_key;
struct bucket *g; struct bucket *g;
struct bkey_i_alloc *a; struct bkey_i_alloc *a;
int ret;
u8 *d; u8 *d;
percpu_down_read(&c->usage_lock); percpu_down_read(&c->usage_lock);
...@@ -260,32 +261,50 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca, ...@@ -260,32 +261,50 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
bch2_btree_iter_set_pos(iter, a->k.p); bch2_btree_iter_set_pos(iter, a->k.p);
return bch2_btree_insert_at(c, NULL, journal_seq, ret = bch2_btree_insert_at(c, NULL, journal_seq,
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE| BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE| BTREE_INSERT_USE_ALLOC_RESERVE|
flags, flags,
BTREE_INSERT_ENTRY(iter, &a->k_i)); BTREE_INSERT_ENTRY(iter, &a->k_i));
if (!ret && ca->buckets_written)
set_bit(b, ca->buckets_written);
return ret;
} }
int bch2_alloc_replay_key(struct bch_fs *c, struct bpos pos) int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
{ {
struct bch_dev *ca; struct bch_dev *ca;
struct btree_iter iter; struct btree_iter iter;
int ret; int ret;
if (pos.inode >= c->sb.nr_devices || !c->devs[pos.inode]) if (k->k.p.inode >= c->sb.nr_devices ||
!c->devs[k->k.p.inode])
return 0; return 0;
ca = bch_dev_bkey_exists(c, pos.inode); ca = bch_dev_bkey_exists(c, k->k.p.inode);
if (pos.offset >= ca->mi.nbuckets) if (k->k.p.offset >= ca->mi.nbuckets)
return 0; return 0;
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS_MIN, bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, k->k.p,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT); BTREE_ITER_INTENT);
ret = __bch2_alloc_write_key(c, ca, pos.offset, &iter, NULL, 0); ret = bch2_btree_iter_traverse(&iter);
if (ret)
goto err;
/* check buckets_written with btree node locked: */
ret = test_bit(k->k.p.offset, ca->buckets_written)
? 0
: bch2_btree_insert_at(c, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_REPLAY,
BTREE_INSERT_ENTRY(&iter, k));
err:
bch2_btree_iter_unlock(&iter); bch2_btree_iter_unlock(&iter);
return ret; return ret;
} }
...@@ -1284,51 +1303,49 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) ...@@ -1284,51 +1303,49 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
/* Scan for buckets that are already invalidated: */ /* Scan for buckets that are already invalidated: */
for_each_rw_member(ca, c, dev_iter) { for_each_rw_member(ca, c, dev_iter) {
struct btree_iter iter; struct bucket_array *buckets;
struct bucket_mark m; struct bucket_mark m;
struct bkey_s_c k;
for_each_btree_key(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0), 0, k) { down_read(&ca->bucket_lock);
if (k.k->type != BCH_ALLOC) percpu_down_read(&c->usage_lock);
continue;
buckets = bucket_array(ca);
bu = k.k->p.offset; for (bu = buckets->first_bucket;
m = READ_ONCE(bucket(ca, bu)->mark); bu < buckets->nbuckets; bu++) {
m = READ_ONCE(buckets->b[bu].mark);
if (!is_available_bucket(m) || m.cached_sectors) if (!m.gen_valid ||
!is_available_bucket(m) ||
m.cached_sectors)
continue; continue;
percpu_down_read(&c->usage_lock);
bch2_mark_alloc_bucket(c, ca, bu, true, bch2_mark_alloc_bucket(c, ca, bu, true,
gc_pos_alloc(c, NULL), gc_pos_alloc(c, NULL),
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE| BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD); BCH_BUCKET_MARK_GC_LOCK_HELD);
percpu_up_read(&c->usage_lock);
fifo_push(&ca->free_inc, bu); fifo_push(&ca->free_inc, bu);
if (fifo_full(&ca->free_inc)) discard_invalidated_buckets(c, ca);
if (fifo_full(&ca->free[RESERVE_BTREE]))
break; break;
} }
bch2_btree_iter_unlock(&iter); percpu_up_read(&c->usage_lock);
up_read(&ca->bucket_lock);
} }
/* did we find enough buckets? */ /* did we find enough buckets? */
for_each_rw_member(ca, c, dev_iter) for_each_rw_member(ca, c, dev_iter)
if (fifo_used(&ca->free_inc) < ca->free[RESERVE_BTREE].size) { if (!fifo_full(&ca->free[RESERVE_BTREE])) {
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
goto not_enough; goto not_enough;
} }
return 0; return 0;
not_enough: not_enough:
pr_debug("did not find enough empty buckets; issuing discards"); pr_debug("not enough empty buckets; scanning for reclaimable buckets");
/* clear out free_inc, we'll be using it again below: */
for_each_rw_member(ca, c, dev_iter)
discard_invalidated_buckets(c, ca);
pr_debug("scanning for reclaimable buckets");
for_each_rw_member(ca, c, dev_iter) { for_each_rw_member(ca, c, dev_iter) {
find_reclaimable_buckets(c, ca); find_reclaimable_buckets(c, ca);
......
...@@ -17,7 +17,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); ...@@ -17,7 +17,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
} }
int bch2_alloc_read(struct bch_fs *, struct list_head *); int bch2_alloc_read(struct bch_fs *, struct list_head *);
int bch2_alloc_replay_key(struct bch_fs *, struct bpos); int bch2_alloc_replay_key(struct bch_fs *, struct bkey_i *);
static inline void bch2_wake_allocator(struct bch_dev *ca) static inline void bch2_wake_allocator(struct bch_dev *ca)
{ {
......
...@@ -394,6 +394,7 @@ struct bch_dev { ...@@ -394,6 +394,7 @@ struct bch_dev {
*/ */
struct bucket_array __rcu *buckets; struct bucket_array __rcu *buckets;
unsigned long *buckets_dirty; unsigned long *buckets_dirty;
unsigned long *buckets_written;
/* most out of date gen in the btree */ /* most out of date gen in the btree */
u8 *oldest_gens; u8 *oldest_gens;
struct rw_semaphore bucket_lock; struct rw_semaphore bucket_lock;
......
...@@ -1096,6 +1096,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ...@@ -1096,6 +1096,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
{ {
struct bucket_array *buckets = NULL, *old_buckets = NULL; struct bucket_array *buckets = NULL, *old_buckets = NULL;
unsigned long *buckets_dirty = NULL; unsigned long *buckets_dirty = NULL;
unsigned long *buckets_written = NULL;
u8 *oldest_gens = NULL; u8 *oldest_gens = NULL;
alloc_fifo free[RESERVE_NR]; alloc_fifo free[RESERVE_NR];
alloc_fifo free_inc; alloc_fifo free_inc;
...@@ -1127,6 +1128,9 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ...@@ -1127,6 +1128,9 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
!(buckets_dirty = kvpmalloc(BITS_TO_LONGS(nbuckets) * !(buckets_dirty = kvpmalloc(BITS_TO_LONGS(nbuckets) *
sizeof(unsigned long), sizeof(unsigned long),
GFP_KERNEL|__GFP_ZERO)) || GFP_KERNEL|__GFP_ZERO)) ||
!(buckets_written = kvpmalloc(BITS_TO_LONGS(nbuckets) *
sizeof(unsigned long),
GFP_KERNEL|__GFP_ZERO)) ||
!init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) || !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
!init_fifo(&free[RESERVE_MOVINGGC], !init_fifo(&free[RESERVE_MOVINGGC],
copygc_reserve, GFP_KERNEL) || copygc_reserve, GFP_KERNEL) ||
...@@ -1161,6 +1165,9 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ...@@ -1161,6 +1165,9 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
memcpy(buckets_dirty, memcpy(buckets_dirty,
ca->buckets_dirty, ca->buckets_dirty,
BITS_TO_LONGS(n) * sizeof(unsigned long)); BITS_TO_LONGS(n) * sizeof(unsigned long));
memcpy(buckets_written,
ca->buckets_written,
BITS_TO_LONGS(n) * sizeof(unsigned long));
} }
rcu_assign_pointer(ca->buckets, buckets); rcu_assign_pointer(ca->buckets, buckets);
...@@ -1168,6 +1175,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ...@@ -1168,6 +1175,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
swap(ca->oldest_gens, oldest_gens); swap(ca->oldest_gens, oldest_gens);
swap(ca->buckets_dirty, buckets_dirty); swap(ca->buckets_dirty, buckets_dirty);
swap(ca->buckets_written, buckets_written);
if (resize) if (resize)
percpu_up_write(&c->usage_lock); percpu_up_write(&c->usage_lock);
...@@ -1207,6 +1215,8 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ...@@ -1207,6 +1215,8 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
free_fifo(&free[i]); free_fifo(&free[i]);
kvpfree(buckets_dirty, kvpfree(buckets_dirty,
BITS_TO_LONGS(nbuckets) * sizeof(unsigned long)); BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
kvpfree(buckets_written,
BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
kvpfree(oldest_gens, kvpfree(oldest_gens,
nbuckets * sizeof(u8)); nbuckets * sizeof(u8));
if (buckets) if (buckets)
...@@ -1224,6 +1234,8 @@ void bch2_dev_buckets_free(struct bch_dev *ca) ...@@ -1224,6 +1234,8 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
free_fifo(&ca->free_inc); free_fifo(&ca->free_inc);
for (i = 0; i < RESERVE_NR; i++) for (i = 0; i < RESERVE_NR; i++)
free_fifo(&ca->free[i]); free_fifo(&ca->free[i]);
kvpfree(ca->buckets_written,
BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
kvpfree(ca->buckets_dirty, kvpfree(ca->buckets_dirty,
BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8)); kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8));
......
...@@ -780,7 +780,6 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) ...@@ -780,7 +780,6 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
int ret = 0; int ret = 0;
list_for_each_entry_safe(i, n, list, list) { list_for_each_entry_safe(i, n, list, list) {
j->replay_journal_seq = le64_to_cpu(i->j.seq); j->replay_journal_seq = le64_to_cpu(i->j.seq);
for_each_jset_key(k, _n, entry, &i->j) { for_each_jset_key(k, _n, entry, &i->j) {
...@@ -790,7 +789,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) ...@@ -790,7 +789,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
* allocation code handles replay for * allocation code handles replay for
* BTREE_ID_ALLOC keys: * BTREE_ID_ALLOC keys:
*/ */
ret = bch2_alloc_replay_key(c, k->k.p); ret = bch2_alloc_replay_key(c, k);
} else { } else {
/* /*
* We might cause compressed extents to be * We might cause compressed extents to be
...@@ -801,9 +800,9 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) ...@@ -801,9 +800,9 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
bch2_disk_reservation_init(c, 0); bch2_disk_reservation_init(c, 0);
ret = bch2_btree_insert(c, entry->btree_id, k, ret = bch2_btree_insert(c, entry->btree_id, k,
&disk_res, NULL, &disk_res, NULL,
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_REPLAY); BTREE_INSERT_JOURNAL_REPLAY);
} }
if (ret) { if (ret) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment