Commit 0a63b66d authored by Kent Overstreet's avatar Kent Overstreet

bcache: Rework btree cache reserve handling

This changes the bucket allocation reserves to use _real_ reserves - separate
freelists - instead of watermarks, which if nothing else makes the current code
saner to reason about and is going to be important in the future when we add
support for multiple btrees.

It also adds btree_check_reserve(), which checks (and locks) the reserves for
both bucket allocation and memory allocation for btree nodes; the old code just
kinda sorta assumed that since (e.g. for btree node splits) it had the root
locked and that meant no other threads could try to make use of the same
reserve; this technically should have been ok for memory allocation (we should
always have a reserve for memory allocation (the btree node cache is used as a
reserve and we preallocate it)), but multiple btrees will mean that locking the
root won't be sufficient anymore, and for the bucket allocation reserve it was
technically possible for the old code to deadlock.
Signed-off-by: default avatarKent Overstreet <kmo@daterainc.com>
parent 56b30770
...@@ -375,6 +375,7 @@ static int bch_allocator_thread(void *arg) ...@@ -375,6 +375,7 @@ static int bch_allocator_thread(void *arg)
} }
allocator_wait(ca, bch_allocator_push(ca, bucket)); allocator_wait(ca, bch_allocator_push(ca, bucket));
wake_up(&ca->set->btree_cache_wait);
wake_up(&ca->set->bucket_wait); wake_up(&ca->set->bucket_wait);
} }
...@@ -717,25 +718,3 @@ int bch_cache_allocator_start(struct cache *ca) ...@@ -717,25 +718,3 @@ int bch_cache_allocator_start(struct cache *ca)
ca->alloc_thread = k; ca->alloc_thread = k;
return 0; return 0;
} }
int bch_cache_allocator_init(struct cache *ca)
{
/*
* Reserve:
* Prio/gen writes first
* Then 8 for btree allocations
* Then half for the moving garbage collector
*/
#if 0
ca->watermark[WATERMARK_PRIO] = 0;
ca->watermark[WATERMARK_METADATA] = prio_buckets(ca);
ca->watermark[WATERMARK_MOVINGGC] = 8 +
ca->watermark[WATERMARK_METADATA];
ca->watermark[WATERMARK_NONE] = ca->free.size / 2 +
ca->watermark[WATERMARK_MOVINGGC];
#endif
return 0;
}
...@@ -562,19 +562,16 @@ struct cache_set { ...@@ -562,19 +562,16 @@ struct cache_set {
struct list_head btree_cache_freed; struct list_head btree_cache_freed;
/* Number of elements in btree_cache + btree_cache_freeable lists */ /* Number of elements in btree_cache + btree_cache_freeable lists */
unsigned bucket_cache_used; unsigned btree_cache_used;
/* /*
* If we need to allocate memory for a new btree node and that * If we need to allocate memory for a new btree node and that
* allocation fails, we can cannibalize another node in the btree cache * allocation fails, we can cannibalize another node in the btree cache
* to satisfy the allocation. However, only one thread can be doing this * to satisfy the allocation - lock to guarantee only one thread does
* at a time, for obvious reasons - try_harder and try_wait are * this at a time:
* basically a lock for this that we can wait on asynchronously. The
* btree_root() macro releases the lock when it returns.
*/ */
struct task_struct *try_harder; wait_queue_head_t btree_cache_wait;
wait_queue_head_t try_wait; struct task_struct *btree_cache_alloc_lock;
uint64_t try_harder_start;
/* /*
* When we free a btree node, we increment the gen of the bucket the * When we free a btree node, we increment the gen of the bucket the
...@@ -669,7 +666,6 @@ struct cache_set { ...@@ -669,7 +666,6 @@ struct cache_set {
struct time_stats btree_gc_time; struct time_stats btree_gc_time;
struct time_stats btree_split_time; struct time_stats btree_split_time;
struct time_stats btree_read_time; struct time_stats btree_read_time;
struct time_stats try_harder_time;
atomic_long_t cache_read_races; atomic_long_t cache_read_races;
atomic_long_t writeback_keys_done; atomic_long_t writeback_keys_done;
...@@ -956,7 +952,6 @@ int bch_open_buckets_alloc(struct cache_set *); ...@@ -956,7 +952,6 @@ int bch_open_buckets_alloc(struct cache_set *);
void bch_open_buckets_free(struct cache_set *); void bch_open_buckets_free(struct cache_set *);
int bch_cache_allocator_start(struct cache *ca); int bch_cache_allocator_start(struct cache *ca);
int bch_cache_allocator_init(struct cache *ca);
void bch_debug_exit(void); void bch_debug_exit(void);
int bch_debug_init(struct kobject *); int bch_debug_init(struct kobject *);
......
This diff is collapsed.
...@@ -242,8 +242,9 @@ void __bch_btree_node_write(struct btree *, struct closure *); ...@@ -242,8 +242,9 @@ void __bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_node_write(struct btree *, struct closure *); void bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_set_root(struct btree *); void bch_btree_set_root(struct btree *);
struct btree *bch_btree_node_alloc(struct cache_set *, int, bool); struct btree *bch_btree_node_alloc(struct cache_set *, struct btree_op *, int);
struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, int, bool); struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *,
struct bkey *, int, bool);
int bch_btree_insert_check_key(struct btree *, struct btree_op *, int bch_btree_insert_check_key(struct btree *, struct btree_op *,
struct bkey *); struct bkey *);
......
...@@ -1495,14 +1495,13 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) ...@@ -1495,14 +1495,13 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
sema_init(&c->sb_write_mutex, 1); sema_init(&c->sb_write_mutex, 1);
mutex_init(&c->bucket_lock); mutex_init(&c->bucket_lock);
init_waitqueue_head(&c->try_wait); init_waitqueue_head(&c->btree_cache_wait);
init_waitqueue_head(&c->bucket_wait); init_waitqueue_head(&c->bucket_wait);
sema_init(&c->uuid_write_mutex, 1); sema_init(&c->uuid_write_mutex, 1);
spin_lock_init(&c->btree_gc_time.lock); spin_lock_init(&c->btree_gc_time.lock);
spin_lock_init(&c->btree_split_time.lock); spin_lock_init(&c->btree_split_time.lock);
spin_lock_init(&c->btree_read_time.lock); spin_lock_init(&c->btree_read_time.lock);
spin_lock_init(&c->try_harder_time.lock);
bch_moving_init_cache_set(c); bch_moving_init_cache_set(c);
...@@ -1591,7 +1590,7 @@ static void run_cache_set(struct cache_set *c) ...@@ -1591,7 +1590,7 @@ static void run_cache_set(struct cache_set *c)
goto err; goto err;
err = "error reading btree root"; err = "error reading btree root";
c->root = bch_btree_node_get(c, k, j->btree_level, true); c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true);
if (IS_ERR_OR_NULL(c->root)) if (IS_ERR_OR_NULL(c->root))
goto err; goto err;
...@@ -1666,7 +1665,7 @@ static void run_cache_set(struct cache_set *c) ...@@ -1666,7 +1665,7 @@ static void run_cache_set(struct cache_set *c)
goto err; goto err;
err = "cannot allocate new btree root"; err = "cannot allocate new btree root";
c->root = bch_btree_node_alloc(c, 0, true); c->root = bch_btree_node_alloc(c, NULL, 0);
if (IS_ERR_OR_NULL(c->root)) if (IS_ERR_OR_NULL(c->root))
goto err; goto err;
...@@ -1847,13 +1846,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) ...@@ -1847,13 +1846,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
for_each_bucket(b, ca) for_each_bucket(b, ca)
atomic_set(&b->pin, 0); atomic_set(&b->pin, 0);
if (bch_cache_allocator_init(ca))
goto err;
return 0; return 0;
err:
kobject_put(&ca->kobj);
return -ENOMEM;
} }
static void register_cache(struct cache_sb *sb, struct page *sb_page, static void register_cache(struct cache_sb *sb, struct page *sb_page,
......
...@@ -54,7 +54,6 @@ sysfs_time_stats_attribute(btree_gc, sec, ms); ...@@ -54,7 +54,6 @@ sysfs_time_stats_attribute(btree_gc, sec, ms);
sysfs_time_stats_attribute(btree_split, sec, us); sysfs_time_stats_attribute(btree_split, sec, us);
sysfs_time_stats_attribute(btree_sort, ms, us); sysfs_time_stats_attribute(btree_sort, ms, us);
sysfs_time_stats_attribute(btree_read, ms, us); sysfs_time_stats_attribute(btree_read, ms, us);
sysfs_time_stats_attribute(try_harder, ms, us);
read_attribute(btree_nodes); read_attribute(btree_nodes);
read_attribute(btree_used_percent); read_attribute(btree_used_percent);
...@@ -534,7 +533,6 @@ SHOW(__bch_cache_set) ...@@ -534,7 +533,6 @@ SHOW(__bch_cache_set)
sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us); sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us);
sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us); sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us);
sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us); sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us);
sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us);
sysfs_print(btree_used_percent, btree_used(c)); sysfs_print(btree_used_percent, btree_used(c));
sysfs_print(btree_nodes, c->gc_stats.nodes); sysfs_print(btree_nodes, c->gc_stats.nodes);
...@@ -709,7 +707,6 @@ static struct attribute *bch_cache_set_internal_files[] = { ...@@ -709,7 +707,6 @@ static struct attribute *bch_cache_set_internal_files[] = {
sysfs_time_stats_attribute_list(btree_split, sec, us) sysfs_time_stats_attribute_list(btree_split, sec, us)
sysfs_time_stats_attribute_list(btree_sort, ms, us) sysfs_time_stats_attribute_list(btree_sort, ms, us)
sysfs_time_stats_attribute_list(btree_read, ms, us) sysfs_time_stats_attribute_list(btree_read, ms, us)
sysfs_time_stats_attribute_list(try_harder, ms, us)
&sysfs_btree_nodes, &sysfs_btree_nodes,
&sysfs_btree_used_percent, &sysfs_btree_used_percent,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment