Commit 5147b9ae authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Btree key cache instrumentation

It turns out the btree key cache shrinker wasn't actually reclaiming
anything, prior to the previous patch. This adds instrumentation so that
if we have further issues we can see what's going on.

Specifically, sysfs internal/btree_key_cache is greatly expanded with
new counters, and the SRCU sequence numbers of the first 10 entries on
each pending freelist, and we also add trigger_btree_key_cache_shrink
for testing without having to prune all the system caches.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent e4f2c4df
......@@ -822,6 +822,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
int srcu_idx;
mutex_lock(&bc->lock);
bc->requested_to_free += sc->nr_to_scan;
srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
flags = memalloc_nofs_save();
......@@ -840,6 +842,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
atomic_long_dec(&bc->nr_freed);
freed++;
bc->nr_freed_nonpcpu--;
bc->freed++;
}
list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
......@@ -853,6 +856,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
atomic_long_dec(&bc->nr_freed);
freed++;
bc->nr_freed_pcpu--;
bc->freed++;
}
rcu_read_lock();
......@@ -871,13 +875,18 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
ck = container_of(pos, struct bkey_cached, hash);
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
bc->skipped_dirty++;
goto next;
} else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) {
clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
bc->skipped_accessed++;
goto next;
} else if (bkey_cached_lock_for_evict(ck)) {
bkey_cached_evict(bc, ck);
bkey_cached_free(bc, ck);
bc->moved_to_freelist++;
} else {
bc->skipped_lock_fail++;
}
scanned++;
......@@ -1024,11 +1033,47 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
return 0;
}
void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *bc)
{
prt_printf(out, "nr_freed:\t%lu\n", atomic_long_read(&c->nr_freed));
prt_printf(out, "nr_keys:\t%lu\n", atomic_long_read(&c->nr_keys));
prt_printf(out, "nr_dirty:\t%lu\n", atomic_long_read(&c->nr_dirty));
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
printbuf_tabstop_push(out, 24);
printbuf_tabstop_push(out, 12);
unsigned flags = memalloc_nofs_save();
mutex_lock(&bc->lock);
prt_printf(out, "keys:\t%lu\r\n", atomic_long_read(&bc->nr_keys));
prt_printf(out, "dirty:\t%lu\r\n", atomic_long_read(&bc->nr_dirty));
prt_printf(out, "freelist:\t%lu\r\n", atomic_long_read(&bc->nr_freed));
prt_printf(out, "nonpcpu freelist:\t%lu\r\n", bc->nr_freed_nonpcpu);
prt_printf(out, "pcpu freelist:\t%lu\r\n", bc->nr_freed_pcpu);
prt_printf(out, "\nshrinker:\n");
prt_printf(out, "requested_to_free:\t%lu\r\n", bc->requested_to_free);
prt_printf(out, "freed:\t%lu\r\n", bc->freed);
prt_printf(out, "moved_to_freelist:\t%lu\r\n", bc->moved_to_freelist);
prt_printf(out, "skipped_dirty:\t%lu\r\n", bc->skipped_dirty);
prt_printf(out, "skipped_accessed:\t%lu\r\n", bc->skipped_accessed);
prt_printf(out, "skipped_lock_fail:\t%lu\r\n", bc->skipped_lock_fail);
prt_printf(out, "srcu seq:\t%lu\r\n", get_state_synchronize_srcu(&c->btree_trans_barrier));
struct bkey_cached *ck;
unsigned iter = 0;
list_for_each_entry(ck, &bc->freed_nonpcpu, list) {
prt_printf(out, "freed_nonpcpu:\t%lu\r\n", ck->btree_trans_barrier_seq);
if (++iter > 10)
break;
}
iter = 0;
list_for_each_entry(ck, &bc->freed_pcpu, list) {
prt_printf(out, "freed_pcpu:\t%lu\r\n", ck->btree_trans_barrier_seq);
if (++iter > 10)
break;
}
mutex_unlock(&bc->lock);
memalloc_flags_restore(flags);
}
void bch2_btree_key_cache_exit(void)
......
......@@ -24,6 +24,14 @@ struct btree_key_cache {
atomic_long_t nr_freed;
atomic_long_t nr_keys;
atomic_long_t nr_dirty;
/* shrinker stats */
unsigned long requested_to_free;
unsigned long freed;
unsigned long moved_to_freelist;
unsigned long skipped_dirty;
unsigned long skipped_accessed;
unsigned long skipped_lock_fail;
};
struct bkey_cached_key {
......
......@@ -140,8 +140,8 @@ write_attribute(trigger_gc);
write_attribute(trigger_discards);
write_attribute(trigger_invalidates);
write_attribute(trigger_journal_flush);
write_attribute(prune_cache);
write_attribute(btree_wakeup);
write_attribute(trigger_btree_cache_shrink);
write_attribute(trigger_btree_key_cache_shrink);
rw_attribute(gc_gens_pos);
read_attribute(uuid);
......@@ -346,21 +346,6 @@ static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
prt_printf(out, "\n");
}
static void bch2_btree_wakeup_all(struct bch_fs *c)
{
struct btree_trans *trans;
seqmutex_lock(&c->btree_trans_lock);
list_for_each_entry(trans, &c->btree_trans_list, list) {
struct btree_bkey_cached_common *b = READ_ONCE(trans->locking);
if (b)
six_lock_wakeup_all(&b->lock);
}
seqmutex_unlock(&c->btree_trans_lock);
}
static void fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
{
unsigned nr[BCH_DATA_NR];
......@@ -513,7 +498,7 @@ STORE(bch2_fs)
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))
return -EROFS;
if (attr == &sysfs_prune_cache) {
if (attr == &sysfs_trigger_btree_cache_shrink) {
struct shrink_control sc;
sc.gfp_mask = GFP_KERNEL;
......@@ -521,8 +506,13 @@ STORE(bch2_fs)
c->btree_cache.shrink->scan_objects(c->btree_cache.shrink, &sc);
}
if (attr == &sysfs_btree_wakeup)
bch2_btree_wakeup_all(c);
if (attr == &sysfs_trigger_btree_key_cache_shrink) {
struct shrink_control sc;
sc.gfp_mask = GFP_KERNEL;
sc.nr_to_scan = strtoul_or_return(buf);
c->btree_key_cache.shrink->scan_objects(c->btree_cache.shrink, &sc);
}
if (attr == &sysfs_trigger_gc)
bch2_gc_gens(c);
......@@ -656,8 +646,8 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_trigger_discards,
&sysfs_trigger_invalidates,
&sysfs_trigger_journal_flush,
&sysfs_prune_cache,
&sysfs_btree_wakeup,
&sysfs_trigger_btree_cache_shrink,
&sysfs_trigger_btree_key_cache_shrink,
&sysfs_gc_gens_pos,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment