Commit bceacfa9 authored by Daniel Hill's avatar Daniel Hill Committed by Kent Overstreet

bcachefs: add counters for failed shrinker reclaim

This adds distinct counters for every reason the btree node shrinker can
fail to free an object - if our shrinker isn't making progress, this
will tell us why.
Signed-off-by: default avatarDaniel Hill <daniel@gluo.nz>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 692aa7a5
...@@ -16,6 +16,12 @@ ...@@ -16,6 +16,12 @@
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
do { \
if (shrinker_counter) \
bc->not_freed_##counter++; \
} while (0)
const char * const bch2_btree_node_flags[] = { const char * const bch2_btree_node_flags[] = {
#define x(f) #f, #define x(f) #f,
BTREE_FLAGS() BTREE_FLAGS()
...@@ -238,7 +244,7 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc, ...@@ -238,7 +244,7 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc,
* this version is for btree nodes that have already been freed (we're not * this version is for btree nodes that have already been freed (we're not
* reaping a real btree node) * reaping a real btree node)
*/ */
static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter)
{ {
struct btree_cache *bc = &c->btree_cache; struct btree_cache *bc = &c->btree_cache;
int ret = 0; int ret = 0;
...@@ -260,38 +266,64 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) ...@@ -260,38 +266,64 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
if (b->flags & ((1U << BTREE_NODE_dirty)| if (b->flags & ((1U << BTREE_NODE_dirty)|
(1U << BTREE_NODE_read_in_flight)| (1U << BTREE_NODE_read_in_flight)|
(1U << BTREE_NODE_write_in_flight))) { (1U << BTREE_NODE_write_in_flight))) {
if (!flush) if (!flush) {
if (btree_node_dirty(b))
BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
else if (btree_node_read_in_flight(b))
BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
else if (btree_node_write_in_flight(b))
BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
return -BCH_ERR_ENOMEM_btree_node_reclaim; return -BCH_ERR_ENOMEM_btree_node_reclaim;
}
/* XXX: waiting on IO with btree cache lock held */ /* XXX: waiting on IO with btree cache lock held */
bch2_btree_node_wait_on_read(b); bch2_btree_node_wait_on_read(b);
bch2_btree_node_wait_on_write(b); bch2_btree_node_wait_on_write(b);
} }
if (!six_trylock_intent(&b->c.lock)) if (!six_trylock_intent(&b->c.lock)) {
BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent);
return -BCH_ERR_ENOMEM_btree_node_reclaim; return -BCH_ERR_ENOMEM_btree_node_reclaim;
}
if (!six_trylock_write(&b->c.lock)) if (!six_trylock_write(&b->c.lock)) {
BTREE_CACHE_NOT_FREED_INCREMENT(lock_write);
goto out_unlock_intent; goto out_unlock_intent;
}
/* recheck under lock */ /* recheck under lock */
if (b->flags & ((1U << BTREE_NODE_read_in_flight)| if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
(1U << BTREE_NODE_write_in_flight))) { (1U << BTREE_NODE_write_in_flight))) {
if (!flush) if (!flush) {
if (btree_node_read_in_flight(b))
BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
else if (btree_node_write_in_flight(b))
BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
goto out_unlock; goto out_unlock;
}
six_unlock_write(&b->c.lock); six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock); six_unlock_intent(&b->c.lock);
goto wait_on_io; goto wait_on_io;
} }
if (btree_node_noevict(b) || if (btree_node_noevict(b)) {
btree_node_write_blocked(b) || BTREE_CACHE_NOT_FREED_INCREMENT(noevict);
btree_node_will_make_reachable(b)) goto out_unlock;
}
if (btree_node_write_blocked(b)) {
BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked);
goto out_unlock; goto out_unlock;
}
if (btree_node_will_make_reachable(b)) {
BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable);
goto out_unlock;
}
if (btree_node_dirty(b)) { if (btree_node_dirty(b)) {
if (!flush) if (!flush) {
BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
goto out_unlock; goto out_unlock;
}
/* /*
* Using the underscore version because we don't want to compact * Using the underscore version because we don't want to compact
* bsets after the write, since this node is about to be evicted * bsets after the write, since this node is about to be evicted
...@@ -321,14 +353,14 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) ...@@ -321,14 +353,14 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
goto out; goto out;
} }
static int btree_node_reclaim(struct bch_fs *c, struct btree *b) static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter)
{ {
return __btree_node_reclaim(c, b, false); return __btree_node_reclaim(c, b, false, shrinker_counter);
} }
static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b) static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
{ {
return __btree_node_reclaim(c, b, true); return __btree_node_reclaim(c, b, true, false);
} }
static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
...@@ -376,11 +408,12 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, ...@@ -376,11 +408,12 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
if (touched >= nr) if (touched >= nr)
goto out; goto out;
if (!btree_node_reclaim(c, b)) { if (!btree_node_reclaim(c, b, true)) {
btree_node_data_free(c, b); btree_node_data_free(c, b);
six_unlock_write(&b->c.lock); six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock); six_unlock_intent(&b->c.lock);
freed++; freed++;
bc->freed++;
} }
} }
restart: restart:
...@@ -389,9 +422,11 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, ...@@ -389,9 +422,11 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
if (btree_node_accessed(b)) { if (btree_node_accessed(b)) {
clear_btree_node_accessed(b); clear_btree_node_accessed(b);
} else if (!btree_node_reclaim(c, b)) { bc->not_freed_access_bit++;
} else if (!btree_node_reclaim(c, b, true)) {
freed++; freed++;
btree_node_data_free(c, b); btree_node_data_free(c, b);
bc->freed++;
bch2_btree_node_hash_remove(bc, b); bch2_btree_node_hash_remove(bc, b);
six_unlock_write(&b->c.lock); six_unlock_write(&b->c.lock);
...@@ -599,7 +634,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c) ...@@ -599,7 +634,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
struct btree *b; struct btree *b;
list_for_each_entry_reverse(b, &bc->live, list) list_for_each_entry_reverse(b, &bc->live, list)
if (!btree_node_reclaim(c, b)) if (!btree_node_reclaim(c, b, false))
return b; return b;
while (1) { while (1) {
...@@ -635,7 +670,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea ...@@ -635,7 +670,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
* disk node. Check the freed list before allocating a new one: * disk node. Check the freed list before allocating a new one:
*/ */
list_for_each_entry(b, freed, list) list_for_each_entry(b, freed, list)
if (!btree_node_reclaim(c, b)) { if (!btree_node_reclaim(c, b, false)) {
list_del_init(&b->list); list_del_init(&b->list);
goto got_node; goto got_node;
} }
...@@ -661,7 +696,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea ...@@ -661,7 +696,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
* the list. Check if there's any freed nodes there: * the list. Check if there's any freed nodes there:
*/ */
list_for_each_entry(b2, &bc->freeable, list) list_for_each_entry(b2, &bc->freeable, list)
if (!btree_node_reclaim(c, b2)) { if (!btree_node_reclaim(c, b2, false)) {
swap(b->data, b2->data); swap(b->data, b2->data);
swap(b->aux_data, b2->aux_data); swap(b->aux_data, b2->aux_data);
btree_node_to_freedlist(bc, b2); btree_node_to_freedlist(bc, b2);
...@@ -1280,12 +1315,12 @@ static void prt_btree_cache_line(struct printbuf *out, const struct bch_fs *c, ...@@ -1280,12 +1315,12 @@ static void prt_btree_cache_line(struct printbuf *out, const struct bch_fs *c,
prt_printf(out, " (%u)\n", nr); prt_printf(out, " (%u)\n", nr);
} }
void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c) void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc)
{ {
const struct btree_cache *bc = &c->btree_cache; struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache);
if (!out->nr_tabstops) if (!out->nr_tabstops)
printbuf_tabstop_push(out, 24); printbuf_tabstop_push(out, 32);
prt_btree_cache_line(out, c, "total:", bc->used); prt_btree_cache_line(out, c, "total:", bc->used);
prt_btree_cache_line(out, c, "nr dirty:", atomic_read(&bc->dirty)); prt_btree_cache_line(out, c, "nr dirty:", atomic_read(&bc->dirty));
...@@ -1294,4 +1329,17 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c) ...@@ -1294,4 +1329,17 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c)
for (unsigned i = 0; i < ARRAY_SIZE(bc->used_by_btree); i++) for (unsigned i = 0; i < ARRAY_SIZE(bc->used_by_btree); i++)
prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->used_by_btree[i]); prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->used_by_btree[i]);
prt_newline(out);
prt_printf(out, "freed:\t%u\n", bc->freed);
prt_printf(out, "not freed:\n");
prt_printf(out, " dirty\t%u\n", bc->not_freed_dirty);
prt_printf(out, " write in flight\t%u\n", bc->not_freed_write_in_flight);
prt_printf(out, " read in flight\t%u\n", bc->not_freed_read_in_flight);
prt_printf(out, " lock intent failed\t%u\n", bc->not_freed_lock_intent);
prt_printf(out, " lock write failed\t%u\n", bc->not_freed_lock_write);
prt_printf(out, " access bit\t%u\n", bc->not_freed_access_bit);
prt_printf(out, " no evict failed\t%u\n", bc->not_freed_noevict);
prt_printf(out, " write blocked\t%u\n", bc->not_freed_write_blocked);
prt_printf(out, " will make reachable\t%u\n", bc->not_freed_will_make_reachable);
} }
...@@ -134,6 +134,6 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b) ...@@ -134,6 +134,6 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b)
const char *bch2_btree_id_str(enum btree_id); const char *bch2_btree_id_str(enum btree_id);
void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *); void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *); void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
void bch2_btree_cache_to_text(struct printbuf *, const struct bch_fs *); void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);
#endif /* _BCACHEFS_BTREE_CACHE_H */ #endif /* _BCACHEFS_BTREE_CACHE_H */
...@@ -163,6 +163,16 @@ struct btree_cache { ...@@ -163,6 +163,16 @@ struct btree_cache {
/* Number of elements in live + freeable lists */ /* Number of elements in live + freeable lists */
unsigned used; unsigned used;
unsigned reserve; unsigned reserve;
unsigned freed;
unsigned not_freed_lock_intent;
unsigned not_freed_lock_write;
unsigned not_freed_dirty;
unsigned not_freed_read_in_flight;
unsigned not_freed_write_in_flight;
unsigned not_freed_noevict;
unsigned not_freed_write_blocked;
unsigned not_freed_will_make_reachable;
unsigned not_freed_access_bit;
atomic_t dirty; atomic_t dirty;
struct shrinker *shrink; struct shrinker *shrink;
......
...@@ -383,7 +383,7 @@ SHOW(bch2_fs) ...@@ -383,7 +383,7 @@ SHOW(bch2_fs)
bch2_journal_debug_to_text(out, &c->journal); bch2_journal_debug_to_text(out, &c->journal);
if (attr == &sysfs_btree_cache) if (attr == &sysfs_btree_cache)
bch2_btree_cache_to_text(out, c); bch2_btree_cache_to_text(out, &c->btree_cache);
if (attr == &sysfs_btree_key_cache) if (attr == &sysfs_btree_key_cache)
bch2_btree_key_cache_to_text(out, &c->btree_key_cache); bch2_btree_key_cache_to_text(out, &c->btree_key_cache);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment