Commit 4d8100da authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Allocate fs_usage in do_btree_insert_at()

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 9623ab27
......@@ -635,7 +635,10 @@ struct bch_fs {
struct percpu_rw_semaphore mark_lock;
struct bch_fs_usage __percpu *usage[2];
struct bch_fs_usage __percpu *usage_scratch;
/* single element mempool: */
struct mutex usage_scratch_lock;
struct bch_fs_usage *usage_scratch;
/*
* When we invalidate buckets, we use both the priority and the amount
......
......@@ -1076,8 +1076,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
mutex_lock(&c->btree_interior_update_lock);
percpu_down_read(&c->mark_lock);
preempt_disable();
fs_usage = bch2_fs_usage_get_scratch(c);
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
true, 0,
......@@ -1090,7 +1089,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
fs_usage);
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
preempt_enable();
bch2_fs_usage_scratch_put(c, fs_usage);
percpu_up_read(&c->mark_lock);
mutex_unlock(&c->btree_interior_update_lock);
}
......@@ -1171,8 +1170,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
mutex_lock(&c->btree_interior_update_lock);
percpu_down_read(&c->mark_lock);
preempt_disable();
fs_usage = bch2_fs_usage_get_scratch(c);
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
true, 0,
......@@ -1193,7 +1191,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
preempt_enable();
bch2_fs_usage_scratch_put(c, fs_usage);
percpu_up_read(&c->mark_lock);
mutex_unlock(&c->btree_interior_update_lock);
......@@ -1987,7 +1985,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
mutex_lock(&c->btree_interior_update_lock);
percpu_down_read(&c->mark_lock);
fs_usage = bch2_fs_usage_get_scratch(c);
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
true, 0,
......@@ -1998,6 +1996,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
fs_usage);
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
bch2_fs_usage_scratch_put(c, fs_usage);
percpu_up_read(&c->mark_lock);
mutex_unlock(&c->btree_interior_update_lock);
......
......@@ -269,8 +269,6 @@ static void btree_insert_key_leaf(struct btree_trans *trans,
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
bch2_mark_update(trans, insert);
if (!btree_node_is_extents(b))
bch2_insert_fixup_key(trans, insert);
else
......@@ -499,11 +497,6 @@ btree_key_can_insert(struct btree_trans *trans,
if (unlikely(btree_node_fake(b)))
return BTREE_INSERT_BTREE_NODE_FULL;
if (!bch2_bkey_replicas_marked(c,
bkey_i_to_s_c(insert->k),
true))
return BTREE_INSERT_NEED_MARK_REPLICAS;
ret = !btree_node_is_extents(b)
? BTREE_INSERT_OK
: bch2_extent_can_insert(trans, insert, u64s);
......@@ -555,6 +548,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
struct btree_insert_entry **stopped_at)
{
struct bch_fs *c = trans->c;
struct bch_fs_usage *fs_usage = NULL;
struct btree_insert_entry *i;
struct btree_iter *linked;
int ret;
......@@ -562,12 +556,29 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
trans_for_each_update_iter(trans, i)
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
trans_for_each_update_iter(trans, i) {
if (i->deferred ||
!btree_node_type_needs_gc(i->iter->btree_id))
continue;
if (!fs_usage) {
percpu_down_read(&c->mark_lock);
fs_usage = bch2_fs_usage_scratch_get(c);
}
if (!bch2_bkey_replicas_marked_locked(c,
bkey_i_to_s_c(i->k), true)) {
ret = BTREE_INSERT_NEED_MARK_REPLICAS;
goto out;
}
}
btree_trans_lock_write(c, trans);
if (race_fault()) {
ret = -EINTR;
trans_restart(" (race)");
goto out;
goto out_unlock;
}
/*
......@@ -577,7 +588,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
*/
ret = btree_trans_check_can_insert(trans, stopped_at);
if (ret)
goto out;
goto out_unlock;
/*
* Don't get journal reservation until after we know insert will
......@@ -585,7 +596,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
*/
ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_NONBLOCK);
if (ret)
goto out;
goto out_unlock;
if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
if (journal_seq_verify(c))
......@@ -610,14 +621,25 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
}
}
trans_for_each_update_iter(trans, i)
bch2_mark_update(trans, i, fs_usage);
if (fs_usage)
bch2_trans_fs_usage_apply(trans, fs_usage);
trans_for_each_update(trans, i)
do_btree_insert_one(trans, i);
out:
out_unlock:
BUG_ON(ret &&
(trans->flags & BTREE_INSERT_JOURNAL_RESERVED) &&
trans->journal_res.ref);
btree_trans_unlock_write(trans);
out:
if (fs_usage) {
bch2_fs_usage_scratch_put(c, fs_usage);
percpu_up_read(&c->mark_lock);
}
bch2_journal_res_put(&c->journal, &trans->journal_res);
return ret;
......
......@@ -144,6 +144,37 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
percpu_up_write(&c->mark_lock);
}
void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage)
{
if (fs_usage == c->usage_scratch)
mutex_unlock(&c->usage_scratch_lock);
else
kfree(fs_usage);
}
struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c)
{
struct bch_fs_usage *ret;
unsigned bytes = fs_usage_u64s(c) * sizeof(u64);
ret = kzalloc(bytes, GFP_NOWAIT);
if (ret)
return ret;
if (mutex_trylock(&c->usage_scratch_lock))
goto out_pool;
ret = kzalloc(bytes, GFP_NOFS);
if (ret)
return ret;
mutex_lock(&c->usage_scratch_lock);
out_pool:
ret = c->usage_scratch;
memset(ret, 0, bytes);
return ret;
}
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
{
struct bch_dev_usage ret;
......@@ -906,31 +937,39 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
unsigned journal_seq, unsigned flags,
bool gc)
{
int ret = 0;
preempt_disable();
if (!fs_usage || gc)
fs_usage = this_cpu_ptr(c->usage[gc]);
switch (k.k->type) {
case KEY_TYPE_alloc:
return bch2_mark_alloc(c, k, inserting,
ret = bch2_mark_alloc(c, k, inserting,
fs_usage, journal_seq, flags, gc);
break;
case KEY_TYPE_btree_ptr:
return bch2_mark_extent(c, k, inserting
ret = bch2_mark_extent(c, k, inserting
? c->opts.btree_node_size
: -c->opts.btree_node_size,
BCH_DATA_BTREE,
fs_usage, journal_seq, flags, gc);
break;
case KEY_TYPE_extent:
return bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
fs_usage, journal_seq, flags, gc);
break;
case KEY_TYPE_stripe:
return bch2_mark_stripe(c, k, inserting,
ret = bch2_mark_stripe(c, k, inserting,
fs_usage, journal_seq, flags, gc);
break;
case KEY_TYPE_inode:
if (inserting)
fs_usage->nr_inodes++;
else
fs_usage->nr_inodes--;
return 0;
break;
case KEY_TYPE_reservation: {
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
......@@ -940,11 +979,13 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
fs_usage->reserved += sectors;
fs_usage->persistent_reserved[replicas - 1] += sectors;
return 0;
break;
}
default:
return 0;
}
preempt_enable();
return ret;
}
int bch2_mark_key_locked(struct bch_fs *c,
......@@ -976,25 +1017,19 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
}
void bch2_mark_update(struct btree_trans *trans,
struct btree_insert_entry *insert)
struct btree_insert_entry *insert,
struct bch_fs_usage *fs_usage)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter = insert->iter;
struct btree *b = iter->l[0].b;
struct btree_node_iter node_iter = iter->l[0].iter;
struct bch_fs_usage *fs_usage;
struct gc_pos pos = gc_pos_btree_node(b);
struct bkey_packed *_k;
u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
static int warned_disk_usage = 0;
if (!btree_node_type_needs_gc(iter->btree_id))
return;
percpu_down_read(&c->mark_lock);
preempt_disable();
fs_usage = bch2_fs_usage_get_scratch(c);
if (!(trans->flags & BTREE_INSERT_NOMARK))
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
bpos_min(insert->k->k.p, b->key.k.p).offset -
......@@ -1047,16 +1082,32 @@ void bch2_mark_update(struct btree_trans *trans,
bch2_btree_node_iter_advance(&node_iter, b);
}
}
if (bch2_fs_usage_apply(c, fs_usage, trans->disk_res) &&
!warned_disk_usage &&
!xchg(&warned_disk_usage, 1)) {
char buf[200];
void bch2_trans_fs_usage_apply(struct btree_trans *trans,
struct bch_fs_usage *fs_usage)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
static int warned_disk_usage = 0;
u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
char buf[200];
if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res) ||
warned_disk_usage ||
xchg(&warned_disk_usage, 1))
return;
pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
trans_for_each_update_iter(trans, i) {
struct btree_iter *iter = i->iter;
struct btree *b = iter->l[0].b;
struct btree_node_iter node_iter = iter->l[0].iter;
struct bkey_packed *_k;
pr_err("while inserting");
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(insert->k));
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
pr_err("%s", buf);
pr_err("overlapping with");
......@@ -1069,8 +1120,8 @@ void bch2_mark_update(struct btree_trans *trans,
k = bkey_disassemble(b, _k, &unpacked);
if (btree_node_is_extents(b)
? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0
: bkey_cmp(insert->k->k.p, k.k->p))
? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0
: bkey_cmp(i->k->k.p, k.k->p))
break;
bch2_bkey_val_to_text(&PBUF(buf), c, k);
......@@ -1079,9 +1130,6 @@ void bch2_mark_update(struct btree_trans *trans,
bch2_btree_node_iter_advance(&node_iter, b);
}
}
preempt_enable();
percpu_up_read(&c->mark_lock);
}
/* Disk reservations: */
......
......@@ -219,13 +219,8 @@ static inline unsigned fs_usage_u64s(struct bch_fs *c)
READ_ONCE(c->replicas.nr);
}
static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c)
{
struct bch_fs_usage *ret = this_cpu_ptr(c->usage_scratch);
memset(ret, 0, fs_usage_u64s(c) * sizeof(u64));
return ret;
}
void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *);
struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *);
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
......@@ -256,10 +251,13 @@ int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c,
int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned);
void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *);
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
struct disk_reservation *);
void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
struct bch_fs_usage *);
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
/* disk reservations: */
void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
......
......@@ -1190,11 +1190,12 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
if (s.deleting)
tmp.k.k.type = KEY_TYPE_discard;
#if 0
/* disabled due to lock recursion - mark_lock: */
if (debug_check_bkeys(c))
bch2_bkey_debugcheck(c, iter->l[0].b,
bkey_i_to_s_c(&tmp.k));
#endif
EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size);
extent_bset_insert(c, iter, &tmp.k);
......
......@@ -207,22 +207,29 @@ static bool __replicas_has_entry(struct bch_replicas_cpu *r,
return __replicas_entry_idx(r, search) >= 0;
}
bool bch2_replicas_marked(struct bch_fs *c,
static bool bch2_replicas_marked_locked(struct bch_fs *c,
struct bch_replicas_entry *search,
bool check_gc_replicas)
{
bool marked;
if (!search->nr_devs)
return true;
verify_replicas_entry_sorted(search);
percpu_down_read(&c->mark_lock);
marked = __replicas_has_entry(&c->replicas, search) &&
return __replicas_has_entry(&c->replicas, search) &&
(!check_gc_replicas ||
likely((!c->replicas_gc.entries)) ||
__replicas_has_entry(&c->replicas_gc, search));
}
bool bch2_replicas_marked(struct bch_fs *c,
struct bch_replicas_entry *search,
bool check_gc_replicas)
{
bool marked;
percpu_down_read(&c->mark_lock);
marked = bch2_replicas_marked_locked(c, search, check_gc_replicas);
percpu_up_read(&c->mark_lock);
return marked;
......@@ -263,7 +270,7 @@ static int replicas_table_update(struct bch_fs *c,
struct bch_replicas_cpu *new_r)
{
struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL };
struct bch_fs_usage __percpu *new_scratch = NULL;
struct bch_fs_usage *new_scratch = NULL;
unsigned bytes = sizeof(struct bch_fs_usage) +
sizeof(u64) * new_r->nr;
int ret = -ENOMEM;
......@@ -273,8 +280,7 @@ static int replicas_table_update(struct bch_fs *c,
(c->usage[1] &&
!(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64),
GFP_NOIO))) ||
!(new_scratch = __alloc_percpu_gfp(bytes, sizeof(u64),
GFP_NOIO)))
!(new_scratch = kmalloc(bytes, GFP_NOIO)))
goto err;
if (c->usage[0])
......@@ -290,7 +296,7 @@ static int replicas_table_update(struct bch_fs *c,
swap(c->replicas, *new_r);
ret = 0;
err:
free_percpu(new_scratch);
kfree(new_scratch);
free_percpu(new_usage[1]);
free_percpu(new_usage[0]);
return ret;
......@@ -390,9 +396,9 @@ int bch2_mark_replicas(struct bch_fs *c,
: bch2_mark_replicas_slowpath(c, r);
}
bool bch2_bkey_replicas_marked(struct bch_fs *c,
struct bkey_s_c k,
bool check_gc_replicas)
bool bch2_bkey_replicas_marked_locked(struct bch_fs *c,
struct bkey_s_c k,
bool check_gc_replicas)
{
struct bch_replicas_padded search;
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
......@@ -401,13 +407,27 @@ bool bch2_bkey_replicas_marked(struct bch_fs *c,
for (i = 0; i < cached.nr; i++) {
bch2_replicas_entry_cached(&search.e, cached.devs[i]);
if (!bch2_replicas_marked(c, &search.e, check_gc_replicas))
if (!bch2_replicas_marked_locked(c, &search.e,
check_gc_replicas))
return false;
}
bkey_to_replicas(&search.e, k);
return bch2_replicas_marked(c, &search.e, check_gc_replicas);
return bch2_replicas_marked_locked(c, &search.e, check_gc_replicas);
}
bool bch2_bkey_replicas_marked(struct bch_fs *c,
struct bkey_s_c k,
bool check_gc_replicas)
{
bool marked;
percpu_down_read(&c->mark_lock);
marked = bch2_bkey_replicas_marked_locked(c, k, check_gc_replicas);
percpu_up_read(&c->mark_lock);
return marked;
}
int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
......
......@@ -26,6 +26,8 @@ bool bch2_replicas_marked(struct bch_fs *,
int bch2_mark_replicas(struct bch_fs *,
struct bch_replicas_entry *);
bool bch2_bkey_replicas_marked_locked(struct bch_fs *,
struct bkey_s_c, bool);
bool bch2_bkey_replicas_marked(struct bch_fs *,
struct bkey_s_c, bool);
int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
......
......@@ -404,7 +404,7 @@ static void bch2_fs_free(struct bch_fs *c)
bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c);
percpu_free_rwsem(&c->mark_lock);
free_percpu(c->usage_scratch);
kfree(c->usage_scratch);
free_percpu(c->usage[0]);
free_percpu(c->pcpu);
mempool_exit(&c->btree_iters_pool);
......@@ -572,6 +572,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
mutex_init(&c->btree_reserve_cache_lock);
mutex_init(&c->btree_interior_update_lock);
mutex_init(&c->usage_scratch_lock);
mutex_init(&c->bio_bounce_pages_lock);
bio_list_init(&c->btree_write_error_list);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment