Commit 61274e9d authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Allocator startup improvements

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent a9ec3454
......@@ -237,6 +237,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
__BKEY_PADDED(k, DIV_ROUND_UP(sizeof(struct bch_alloc), 8)) alloc_key;
struct bucket *g;
struct bkey_i_alloc *a;
int ret;
u8 *d;
percpu_down_read(&c->usage_lock);
......@@ -260,32 +261,50 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
bch2_btree_iter_set_pos(iter, a->k.p);
return bch2_btree_insert_at(c, NULL, journal_seq,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE|
flags,
BTREE_INSERT_ENTRY(iter, &a->k_i));
ret = bch2_btree_insert_at(c, NULL, journal_seq,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE|
flags,
BTREE_INSERT_ENTRY(iter, &a->k_i));
if (!ret && ca->buckets_written)
set_bit(b, ca->buckets_written);
return ret;
}
int bch2_alloc_replay_key(struct bch_fs *c, struct bpos pos)
int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
{
struct bch_dev *ca;
struct btree_iter iter;
int ret;
if (pos.inode >= c->sb.nr_devices || !c->devs[pos.inode])
if (k->k.p.inode >= c->sb.nr_devices ||
!c->devs[k->k.p.inode])
return 0;
ca = bch_dev_bkey_exists(c, pos.inode);
ca = bch_dev_bkey_exists(c, k->k.p.inode);
if (pos.offset >= ca->mi.nbuckets)
if (k->k.p.offset >= ca->mi.nbuckets)
return 0;
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, k->k.p,
BTREE_ITER_INTENT);
ret = __bch2_alloc_write_key(c, ca, pos.offset, &iter, NULL, 0);
ret = bch2_btree_iter_traverse(&iter);
if (ret)
goto err;
/* check buckets_written with btree node locked: */
ret = test_bit(k->k.p.offset, ca->buckets_written)
? 0
: bch2_btree_insert_at(c, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_REPLAY,
BTREE_INSERT_ENTRY(&iter, k));
err:
bch2_btree_iter_unlock(&iter);
return ret;
}
......@@ -1284,51 +1303,49 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
/* Scan for buckets that are already invalidated: */
for_each_rw_member(ca, c, dev_iter) {
struct btree_iter iter;
struct bucket_array *buckets;
struct bucket_mark m;
struct bkey_s_c k;
for_each_btree_key(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0), 0, k) {
if (k.k->type != BCH_ALLOC)
continue;
down_read(&ca->bucket_lock);
percpu_down_read(&c->usage_lock);
buckets = bucket_array(ca);
bu = k.k->p.offset;
m = READ_ONCE(bucket(ca, bu)->mark);
for (bu = buckets->first_bucket;
bu < buckets->nbuckets; bu++) {
m = READ_ONCE(buckets->b[bu].mark);
if (!is_available_bucket(m) || m.cached_sectors)
if (!m.gen_valid ||
!is_available_bucket(m) ||
m.cached_sectors)
continue;
percpu_down_read(&c->usage_lock);
bch2_mark_alloc_bucket(c, ca, bu, true,
gc_pos_alloc(c, NULL),
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD);
percpu_up_read(&c->usage_lock);
fifo_push(&ca->free_inc, bu);
if (fifo_full(&ca->free_inc))
discard_invalidated_buckets(c, ca);
if (fifo_full(&ca->free[RESERVE_BTREE]))
break;
}
bch2_btree_iter_unlock(&iter);
percpu_up_read(&c->usage_lock);
up_read(&ca->bucket_lock);
}
/* did we find enough buckets? */
for_each_rw_member(ca, c, dev_iter)
if (fifo_used(&ca->free_inc) < ca->free[RESERVE_BTREE].size) {
if (!fifo_full(&ca->free[RESERVE_BTREE])) {
percpu_ref_put(&ca->io_ref);
goto not_enough;
}
return 0;
not_enough:
pr_debug("did not find enough empty buckets; issuing discards");
/* clear out free_inc, we'll be using it again below: */
for_each_rw_member(ca, c, dev_iter)
discard_invalidated_buckets(c, ca);
pr_debug("scanning for reclaimable buckets");
pr_debug("not enough empty buckets; scanning for reclaimable buckets");
for_each_rw_member(ca, c, dev_iter) {
find_reclaimable_buckets(c, ca);
......
......@@ -17,7 +17,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
}
int bch2_alloc_read(struct bch_fs *, struct list_head *);
int bch2_alloc_replay_key(struct bch_fs *, struct bpos);
int bch2_alloc_replay_key(struct bch_fs *, struct bkey_i *);
static inline void bch2_wake_allocator(struct bch_dev *ca)
{
......
......@@ -394,6 +394,7 @@ struct bch_dev {
*/
struct bucket_array __rcu *buckets;
unsigned long *buckets_dirty;
unsigned long *buckets_written;
/* most out of date gen in the btree */
u8 *oldest_gens;
struct rw_semaphore bucket_lock;
......
......@@ -1096,6 +1096,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
{
struct bucket_array *buckets = NULL, *old_buckets = NULL;
unsigned long *buckets_dirty = NULL;
unsigned long *buckets_written = NULL;
u8 *oldest_gens = NULL;
alloc_fifo free[RESERVE_NR];
alloc_fifo free_inc;
......@@ -1127,6 +1128,9 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
!(buckets_dirty = kvpmalloc(BITS_TO_LONGS(nbuckets) *
sizeof(unsigned long),
GFP_KERNEL|__GFP_ZERO)) ||
!(buckets_written = kvpmalloc(BITS_TO_LONGS(nbuckets) *
sizeof(unsigned long),
GFP_KERNEL|__GFP_ZERO)) ||
!init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
!init_fifo(&free[RESERVE_MOVINGGC],
copygc_reserve, GFP_KERNEL) ||
......@@ -1161,6 +1165,9 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
memcpy(buckets_dirty,
ca->buckets_dirty,
BITS_TO_LONGS(n) * sizeof(unsigned long));
memcpy(buckets_written,
ca->buckets_written,
BITS_TO_LONGS(n) * sizeof(unsigned long));
}
rcu_assign_pointer(ca->buckets, buckets);
......@@ -1168,6 +1175,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
swap(ca->oldest_gens, oldest_gens);
swap(ca->buckets_dirty, buckets_dirty);
swap(ca->buckets_written, buckets_written);
if (resize)
percpu_up_write(&c->usage_lock);
......@@ -1207,6 +1215,8 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
free_fifo(&free[i]);
kvpfree(buckets_dirty,
BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
kvpfree(buckets_written,
BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
kvpfree(oldest_gens,
nbuckets * sizeof(u8));
if (buckets)
......@@ -1224,6 +1234,8 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
free_fifo(&ca->free_inc);
for (i = 0; i < RESERVE_NR; i++)
free_fifo(&ca->free[i]);
kvpfree(ca->buckets_written,
BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
kvpfree(ca->buckets_dirty,
BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8));
......
......@@ -780,7 +780,6 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
int ret = 0;
list_for_each_entry_safe(i, n, list, list) {
j->replay_journal_seq = le64_to_cpu(i->j.seq);
for_each_jset_key(k, _n, entry, &i->j) {
......@@ -790,7 +789,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
* allocation code handles replay for
* BTREE_ID_ALLOC keys:
*/
ret = bch2_alloc_replay_key(c, k->k.p);
ret = bch2_alloc_replay_key(c, k);
} else {
/*
* We might cause compressed extents to be
......@@ -801,9 +800,9 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
bch2_disk_reservation_init(c, 0);
ret = bch2_btree_insert(c, entry->btree_id, k,
&disk_res, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_REPLAY);
&disk_res, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_REPLAY);
}
if (ret) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment