Commit e84987a1 authored by Jens Axboe's avatar Jens Axboe

Merge branch 'bcache-for-3.15' of git://evilpiepirate.org/~kent/linux-bcache into for-3.15/drivers

Kent writes:

Jens, here's the bcache changes for 3.15. Lots of bugfixes, and some
refactoring and cleanups.
parents 5eb9291c cb851149
...@@ -24,11 +24,3 @@ config BCACHE_CLOSURES_DEBUG ...@@ -24,11 +24,3 @@ config BCACHE_CLOSURES_DEBUG
Keeps all active closures in a linked list and provides a debugfs Keeps all active closures in a linked list and provides a debugfs
interface to list them, which makes it possible to see asynchronous interface to list them, which makes it possible to see asynchronous
operations that get stuck. operations that get stuck.
# cgroup code needs to be updated:
#
#config CGROUP_BCACHE
# bool "Cgroup controls for bcache"
# depends on BCACHE && BLK_CGROUP
# ---help---
# TODO
...@@ -78,12 +78,6 @@ uint8_t bch_inc_gen(struct cache *ca, struct bucket *b) ...@@ -78,12 +78,6 @@ uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b)); ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b));
WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX); WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX);
if (CACHE_SYNC(&ca->set->sb)) {
ca->need_save_prio = max(ca->need_save_prio,
bucket_disk_gen(b));
WARN_ON_ONCE(ca->need_save_prio > BUCKET_DISK_GEN_MAX);
}
return ret; return ret;
} }
...@@ -120,51 +114,45 @@ void bch_rescale_priorities(struct cache_set *c, int sectors) ...@@ -120,51 +114,45 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
mutex_unlock(&c->bucket_lock); mutex_unlock(&c->bucket_lock);
} }
/* Allocation */ /*
* Background allocation thread: scans for buckets to be invalidated,
* invalidates them, rewrites prios/gens (marking them as invalidated on disk),
* then optionally issues discard commands to the newly free buckets, then puts
* them on the various freelists.
*/
static inline bool can_inc_bucket_gen(struct bucket *b) static inline bool can_inc_bucket_gen(struct bucket *b)
{ {
return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX && return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX;
bucket_disk_gen(b) < BUCKET_DISK_GEN_MAX;
} }
bool bch_bucket_add_unused(struct cache *ca, struct bucket *b) bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b)
{ {
BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b)); BUG_ON(!ca->set->gc_mark_valid);
if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) {
unsigned i;
for (i = 0; i < RESERVE_NONE; i++)
if (!fifo_full(&ca->free[i]))
goto add;
return false;
}
add:
b->prio = 0;
if (can_inc_bucket_gen(b) &&
fifo_push(&ca->unused, b - ca->buckets)) {
atomic_inc(&b->pin);
return true;
}
return false; return (!GC_MARK(b) ||
} GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
static bool can_invalidate_bucket(struct cache *ca, struct bucket *b)
{
return GC_MARK(b) == GC_MARK_RECLAIMABLE &&
!atomic_read(&b->pin) && !atomic_read(&b->pin) &&
can_inc_bucket_gen(b); can_inc_bucket_gen(b);
} }
static void invalidate_one_bucket(struct cache *ca, struct bucket *b) void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
{ {
lockdep_assert_held(&ca->set->bucket_lock);
BUG_ON(GC_MARK(b) && GC_MARK(b) != GC_MARK_RECLAIMABLE);
if (GC_SECTORS_USED(b))
trace_bcache_invalidate(ca, b - ca->buckets);
bch_inc_gen(ca, b); bch_inc_gen(ca, b);
b->prio = INITIAL_PRIO; b->prio = INITIAL_PRIO;
atomic_inc(&b->pin); atomic_inc(&b->pin);
}
static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
{
__bch_invalidate_one_bucket(ca, b);
fifo_push(&ca->free_inc, b - ca->buckets); fifo_push(&ca->free_inc, b - ca->buckets);
} }
...@@ -195,20 +183,7 @@ static void invalidate_buckets_lru(struct cache *ca) ...@@ -195,20 +183,7 @@ static void invalidate_buckets_lru(struct cache *ca)
ca->heap.used = 0; ca->heap.used = 0;
for_each_bucket(b, ca) { for_each_bucket(b, ca) {
/* if (!bch_can_invalidate_bucket(ca, b))
* If we fill up the unused list, if we then return before
* adding anything to the free_inc list we'll skip writing
* prios/gens and just go back to allocating from the unused
* list:
*/
if (fifo_full(&ca->unused))
return;
if (!can_invalidate_bucket(ca, b))
continue;
if (!GC_SECTORS_USED(b) &&
bch_bucket_add_unused(ca, b))
continue; continue;
if (!heap_full(&ca->heap)) if (!heap_full(&ca->heap))
...@@ -233,7 +208,7 @@ static void invalidate_buckets_lru(struct cache *ca) ...@@ -233,7 +208,7 @@ static void invalidate_buckets_lru(struct cache *ca)
return; return;
} }
invalidate_one_bucket(ca, b); bch_invalidate_one_bucket(ca, b);
} }
} }
...@@ -249,8 +224,8 @@ static void invalidate_buckets_fifo(struct cache *ca) ...@@ -249,8 +224,8 @@ static void invalidate_buckets_fifo(struct cache *ca)
b = ca->buckets + ca->fifo_last_bucket++; b = ca->buckets + ca->fifo_last_bucket++;
if (can_invalidate_bucket(ca, b)) if (bch_can_invalidate_bucket(ca, b))
invalidate_one_bucket(ca, b); bch_invalidate_one_bucket(ca, b);
if (++checked >= ca->sb.nbuckets) { if (++checked >= ca->sb.nbuckets) {
ca->invalidate_needs_gc = 1; ca->invalidate_needs_gc = 1;
...@@ -274,8 +249,8 @@ static void invalidate_buckets_random(struct cache *ca) ...@@ -274,8 +249,8 @@ static void invalidate_buckets_random(struct cache *ca)
b = ca->buckets + n; b = ca->buckets + n;
if (can_invalidate_bucket(ca, b)) if (bch_can_invalidate_bucket(ca, b))
invalidate_one_bucket(ca, b); bch_invalidate_one_bucket(ca, b);
if (++checked >= ca->sb.nbuckets / 2) { if (++checked >= ca->sb.nbuckets / 2) {
ca->invalidate_needs_gc = 1; ca->invalidate_needs_gc = 1;
...@@ -287,8 +262,7 @@ static void invalidate_buckets_random(struct cache *ca) ...@@ -287,8 +262,7 @@ static void invalidate_buckets_random(struct cache *ca)
static void invalidate_buckets(struct cache *ca) static void invalidate_buckets(struct cache *ca)
{ {
if (ca->invalidate_needs_gc) BUG_ON(ca->invalidate_needs_gc);
return;
switch (CACHE_REPLACEMENT(&ca->sb)) { switch (CACHE_REPLACEMENT(&ca->sb)) {
case CACHE_REPLACEMENT_LRU: case CACHE_REPLACEMENT_LRU:
...@@ -301,8 +275,6 @@ static void invalidate_buckets(struct cache *ca) ...@@ -301,8 +275,6 @@ static void invalidate_buckets(struct cache *ca)
invalidate_buckets_random(ca); invalidate_buckets_random(ca);
break; break;
} }
trace_bcache_alloc_invalidate(ca);
} }
#define allocator_wait(ca, cond) \ #define allocator_wait(ca, cond) \
...@@ -350,17 +322,10 @@ static int bch_allocator_thread(void *arg) ...@@ -350,17 +322,10 @@ static int bch_allocator_thread(void *arg)
* possibly issue discards to them, then we add the bucket to * possibly issue discards to them, then we add the bucket to
* the free list: * the free list:
*/ */
while (1) { while (!fifo_empty(&ca->free_inc)) {
long bucket; long bucket;
if ((!atomic_read(&ca->set->prio_blocked) ||
!CACHE_SYNC(&ca->set->sb)) &&
!fifo_empty(&ca->unused))
fifo_pop(&ca->unused, bucket);
else if (!fifo_empty(&ca->free_inc))
fifo_pop(&ca->free_inc, bucket); fifo_pop(&ca->free_inc, bucket);
else
break;
if (ca->discard) { if (ca->discard) {
mutex_unlock(&ca->set->bucket_lock); mutex_unlock(&ca->set->bucket_lock);
...@@ -371,6 +336,7 @@ static int bch_allocator_thread(void *arg) ...@@ -371,6 +336,7 @@ static int bch_allocator_thread(void *arg)
} }
allocator_wait(ca, bch_allocator_push(ca, bucket)); allocator_wait(ca, bch_allocator_push(ca, bucket));
wake_up(&ca->set->btree_cache_wait);
wake_up(&ca->set->bucket_wait); wake_up(&ca->set->bucket_wait);
} }
...@@ -380,9 +346,9 @@ static int bch_allocator_thread(void *arg) ...@@ -380,9 +346,9 @@ static int bch_allocator_thread(void *arg)
* them to the free_inc list: * them to the free_inc list:
*/ */
retry_invalidate:
allocator_wait(ca, ca->set->gc_mark_valid && allocator_wait(ca, ca->set->gc_mark_valid &&
(ca->need_save_prio > 64 || !ca->invalidate_needs_gc);
!ca->invalidate_needs_gc));
invalidate_buckets(ca); invalidate_buckets(ca);
/* /*
...@@ -390,13 +356,28 @@ static int bch_allocator_thread(void *arg) ...@@ -390,13 +356,28 @@ static int bch_allocator_thread(void *arg)
* new stuff to them: * new stuff to them:
*/ */
allocator_wait(ca, !atomic_read(&ca->set->prio_blocked)); allocator_wait(ca, !atomic_read(&ca->set->prio_blocked));
if (CACHE_SYNC(&ca->set->sb) && if (CACHE_SYNC(&ca->set->sb)) {
(!fifo_empty(&ca->free_inc) || /*
ca->need_save_prio > 64)) * This could deadlock if an allocation with a btree
* node locked ever blocked - having the btree node
* locked would block garbage collection, but here we're
* waiting on garbage collection before we invalidate
* and free anything.
*
* But this should be safe since the btree code always
* uses btree_check_reserve() before allocating now, and
* if it fails it blocks without btree nodes locked.
*/
if (!fifo_full(&ca->free_inc))
goto retry_invalidate;
bch_prio_write(ca); bch_prio_write(ca);
} }
}
} }
/* Allocation */
long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
{ {
DEFINE_WAIT(w); DEFINE_WAIT(w);
...@@ -408,8 +389,10 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) ...@@ -408,8 +389,10 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
fifo_pop(&ca->free[reserve], r)) fifo_pop(&ca->free[reserve], r))
goto out; goto out;
if (!wait) if (!wait) {
trace_bcache_alloc_fail(ca, reserve);
return -1; return -1;
}
do { do {
prepare_to_wait(&ca->set->bucket_wait, &w, prepare_to_wait(&ca->set->bucket_wait, &w,
...@@ -425,6 +408,8 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) ...@@ -425,6 +408,8 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
out: out:
wake_up_process(ca->alloc_thread); wake_up_process(ca->alloc_thread);
trace_bcache_alloc(ca, reserve);
if (expensive_debug_checks(ca->set)) { if (expensive_debug_checks(ca->set)) {
size_t iter; size_t iter;
long i; long i;
...@@ -438,8 +423,6 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) ...@@ -438,8 +423,6 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
BUG_ON(i == r); BUG_ON(i == r);
fifo_for_each(i, &ca->free_inc, iter) fifo_for_each(i, &ca->free_inc, iter)
BUG_ON(i == r); BUG_ON(i == r);
fifo_for_each(i, &ca->unused, iter)
BUG_ON(i == r);
} }
b = ca->buckets + r; b = ca->buckets + r;
...@@ -461,17 +444,19 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) ...@@ -461,17 +444,19 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
return r; return r;
} }
void __bch_bucket_free(struct cache *ca, struct bucket *b)
{
SET_GC_MARK(b, 0);
SET_GC_SECTORS_USED(b, 0);
}
void bch_bucket_free(struct cache_set *c, struct bkey *k) void bch_bucket_free(struct cache_set *c, struct bkey *k)
{ {
unsigned i; unsigned i;
for (i = 0; i < KEY_PTRS(k); i++) { for (i = 0; i < KEY_PTRS(k); i++)
struct bucket *b = PTR_BUCKET(c, k, i); __bch_bucket_free(PTR_CACHE(c, k, i),
PTR_BUCKET(c, k, i));
SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
SET_GC_SECTORS_USED(b, 0);
bch_bucket_add_unused(PTR_CACHE(c, k, i), b);
}
} }
int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
...@@ -709,25 +694,3 @@ int bch_cache_allocator_start(struct cache *ca) ...@@ -709,25 +694,3 @@ int bch_cache_allocator_start(struct cache *ca)
ca->alloc_thread = k; ca->alloc_thread = k;
return 0; return 0;
} }
int bch_cache_allocator_init(struct cache *ca)
{
/*
* Reserve:
* Prio/gen writes first
* Then 8 for btree allocations
* Then half for the moving garbage collector
*/
#if 0
ca->watermark[WATERMARK_PRIO] = 0;
ca->watermark[WATERMARK_METADATA] = prio_buckets(ca);
ca->watermark[WATERMARK_MOVINGGC] = 8 +
ca->watermark[WATERMARK_METADATA];
ca->watermark[WATERMARK_NONE] = ca->free.size / 2 +
ca->watermark[WATERMARK_MOVINGGC];
#endif
return 0;
}
...@@ -195,9 +195,7 @@ struct bucket { ...@@ -195,9 +195,7 @@ struct bucket {
atomic_t pin; atomic_t pin;
uint16_t prio; uint16_t prio;
uint8_t gen; uint8_t gen;
uint8_t disk_gen;
uint8_t last_gc; /* Most out of date gen in the btree */ uint8_t last_gc; /* Most out of date gen in the btree */
uint8_t gc_gen;
uint16_t gc_mark; /* Bitfield used by GC. See below for field */ uint16_t gc_mark; /* Bitfield used by GC. See below for field */
}; };
...@@ -207,9 +205,9 @@ struct bucket { ...@@ -207,9 +205,9 @@ struct bucket {
*/ */
BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2); BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
#define GC_MARK_RECLAIMABLE 0 #define GC_MARK_RECLAIMABLE 1
#define GC_MARK_DIRTY 1 #define GC_MARK_DIRTY 2
#define GC_MARK_METADATA 2 #define GC_MARK_METADATA 3
#define GC_SECTORS_USED_SIZE 13 #define GC_SECTORS_USED_SIZE 13
#define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE)) #define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE))
BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE); BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
...@@ -426,14 +424,9 @@ struct cache { ...@@ -426,14 +424,9 @@ struct cache {
* their new gen to disk. After prio_write() finishes writing the new * their new gen to disk. After prio_write() finishes writing the new
* gens/prios, they'll be moved to the free list (and possibly discarded * gens/prios, they'll be moved to the free list (and possibly discarded
* in the process) * in the process)
*
* unused: GC found nothing pointing into these buckets (possibly
* because all the data they contained was overwritten), so we only
* need to discard them before they can be moved to the free list.
*/ */
DECLARE_FIFO(long, free)[RESERVE_NR]; DECLARE_FIFO(long, free)[RESERVE_NR];
DECLARE_FIFO(long, free_inc); DECLARE_FIFO(long, free_inc);
DECLARE_FIFO(long, unused);
size_t fifo_last_bucket; size_t fifo_last_bucket;
...@@ -442,12 +435,6 @@ struct cache { ...@@ -442,12 +435,6 @@ struct cache {
DECLARE_HEAP(struct bucket *, heap); DECLARE_HEAP(struct bucket *, heap);
/*
* max(gen - disk_gen) for all buckets. When it gets too big we have to
* call prio_write() to keep gens from wrapping.
*/
uint8_t need_save_prio;
/* /*
* If nonzero, we know we aren't going to find any buckets to invalidate * If nonzero, we know we aren't going to find any buckets to invalidate
* until a gc finishes - otherwise we could pointlessly burn a ton of * until a gc finishes - otherwise we could pointlessly burn a ton of
...@@ -562,19 +549,16 @@ struct cache_set { ...@@ -562,19 +549,16 @@ struct cache_set {
struct list_head btree_cache_freed; struct list_head btree_cache_freed;
/* Number of elements in btree_cache + btree_cache_freeable lists */ /* Number of elements in btree_cache + btree_cache_freeable lists */
unsigned bucket_cache_used; unsigned btree_cache_used;
/* /*
* If we need to allocate memory for a new btree node and that * If we need to allocate memory for a new btree node and that
* allocation fails, we can cannibalize another node in the btree cache * allocation fails, we can cannibalize another node in the btree cache
* to satisfy the allocation. However, only one thread can be doing this * to satisfy the allocation - lock to guarantee only one thread does
* at a time, for obvious reasons - try_harder and try_wait are * this at a time:
* basically a lock for this that we can wait on asynchronously. The
* btree_root() macro releases the lock when it returns.
*/ */
struct task_struct *try_harder; wait_queue_head_t btree_cache_wait;
wait_queue_head_t try_wait; struct task_struct *btree_cache_alloc_lock;
uint64_t try_harder_start;
/* /*
* When we free a btree node, we increment the gen of the bucket the * When we free a btree node, we increment the gen of the bucket the
...@@ -603,7 +587,7 @@ struct cache_set { ...@@ -603,7 +587,7 @@ struct cache_set {
uint16_t min_prio; uint16_t min_prio;
/* /*
* max(gen - gc_gen) for all buckets. When it gets too big we have to gc * max(gen - last_gc) for all buckets. When it gets too big we have to gc
* to keep gens from wrapping around. * to keep gens from wrapping around.
*/ */
uint8_t need_gc; uint8_t need_gc;
...@@ -628,6 +612,8 @@ struct cache_set { ...@@ -628,6 +612,8 @@ struct cache_set {
/* Number of moving GC bios in flight */ /* Number of moving GC bios in flight */
struct semaphore moving_in_flight; struct semaphore moving_in_flight;
struct workqueue_struct *moving_gc_wq;
struct btree *root; struct btree *root;
#ifdef CONFIG_BCACHE_DEBUG #ifdef CONFIG_BCACHE_DEBUG
...@@ -667,7 +653,6 @@ struct cache_set { ...@@ -667,7 +653,6 @@ struct cache_set {
struct time_stats btree_gc_time; struct time_stats btree_gc_time;
struct time_stats btree_split_time; struct time_stats btree_split_time;
struct time_stats btree_read_time; struct time_stats btree_read_time;
struct time_stats try_harder_time;
atomic_long_t cache_read_races; atomic_long_t cache_read_races;
atomic_long_t writeback_keys_done; atomic_long_t writeback_keys_done;
...@@ -850,9 +835,6 @@ static inline bool cached_dev_get(struct cached_dev *dc) ...@@ -850,9 +835,6 @@ static inline bool cached_dev_get(struct cached_dev *dc)
/* /*
* bucket_gc_gen() returns the difference between the bucket's current gen and * bucket_gc_gen() returns the difference between the bucket's current gen and
* the oldest gen of any pointer into that bucket in the btree (last_gc). * the oldest gen of any pointer into that bucket in the btree (last_gc).
*
* bucket_disk_gen() returns the difference between the current gen and the gen
* on disk; they're both used to make sure gens don't wrap around.
*/ */
static inline uint8_t bucket_gc_gen(struct bucket *b) static inline uint8_t bucket_gc_gen(struct bucket *b)
...@@ -860,13 +842,7 @@ static inline uint8_t bucket_gc_gen(struct bucket *b) ...@@ -860,13 +842,7 @@ static inline uint8_t bucket_gc_gen(struct bucket *b)
return b->gen - b->last_gc; return b->gen - b->last_gc;
} }
static inline uint8_t bucket_disk_gen(struct bucket *b)
{
return b->gen - b->disk_gen;
}
#define BUCKET_GC_GEN_MAX 96U #define BUCKET_GC_GEN_MAX 96U
#define BUCKET_DISK_GEN_MAX 64U
#define kobj_attribute_write(n, fn) \ #define kobj_attribute_write(n, fn) \
static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn) static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
...@@ -899,11 +875,14 @@ void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned); ...@@ -899,11 +875,14 @@ void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
uint8_t bch_inc_gen(struct cache *, struct bucket *); uint8_t bch_inc_gen(struct cache *, struct bucket *);
void bch_rescale_priorities(struct cache_set *, int); void bch_rescale_priorities(struct cache_set *, int);
bool bch_bucket_add_unused(struct cache *, struct bucket *);
long bch_bucket_alloc(struct cache *, unsigned, bool); bool bch_can_invalidate_bucket(struct cache *, struct bucket *);
void __bch_invalidate_one_bucket(struct cache *, struct bucket *);
void __bch_bucket_free(struct cache *, struct bucket *);
void bch_bucket_free(struct cache_set *, struct bkey *); void bch_bucket_free(struct cache_set *, struct bkey *);
long bch_bucket_alloc(struct cache *, unsigned, bool);
int __bch_bucket_alloc_set(struct cache_set *, unsigned, int __bch_bucket_alloc_set(struct cache_set *, unsigned,
struct bkey *, int, bool); struct bkey *, int, bool);
int bch_bucket_alloc_set(struct cache_set *, unsigned, int bch_bucket_alloc_set(struct cache_set *, unsigned,
...@@ -954,13 +933,10 @@ int bch_open_buckets_alloc(struct cache_set *); ...@@ -954,13 +933,10 @@ int bch_open_buckets_alloc(struct cache_set *);
void bch_open_buckets_free(struct cache_set *); void bch_open_buckets_free(struct cache_set *);
int bch_cache_allocator_start(struct cache *ca); int bch_cache_allocator_start(struct cache *ca);
int bch_cache_allocator_init(struct cache *ca);
void bch_debug_exit(void); void bch_debug_exit(void);
int bch_debug_init(struct kobject *); int bch_debug_init(struct kobject *);
void bch_request_exit(void); void bch_request_exit(void);
int bch_request_init(void); int bch_request_init(void);
void bch_btree_exit(void);
int bch_btree_init(void);
#endif /* _BCACHE_H */ #endif /* _BCACHE_H */
...@@ -23,8 +23,8 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set) ...@@ -23,8 +23,8 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
for (k = i->start; k < bset_bkey_last(i); k = next) { for (k = i->start; k < bset_bkey_last(i); k = next) {
next = bkey_next(k); next = bkey_next(k);
printk(KERN_ERR "block %u key %li/%u: ", set, printk(KERN_ERR "block %u key %u/%u: ", set,
(uint64_t *) k - i->d, i->keys); (unsigned) ((u64 *) k - i->d), i->keys);
if (b->ops->key_dump) if (b->ops->key_dump)
b->ops->key_dump(b, k); b->ops->key_dump(b, k);
......
...@@ -478,6 +478,12 @@ static inline void bch_keylist_init(struct keylist *l) ...@@ -478,6 +478,12 @@ static inline void bch_keylist_init(struct keylist *l)
l->top_p = l->keys_p = l->inline_keys; l->top_p = l->keys_p = l->inline_keys;
} }
static inline void bch_keylist_init_single(struct keylist *l, struct bkey *k)
{
l->keys = k;
l->top = bkey_next(k);
}
static inline void bch_keylist_push(struct keylist *l) static inline void bch_keylist_push(struct keylist *l)
{ {
l->top = bkey_next(l->top); l->top = bkey_next(l->top);
......
This diff is collapsed.
...@@ -127,6 +127,8 @@ struct btree { ...@@ -127,6 +127,8 @@ struct btree {
struct cache_set *c; struct cache_set *c;
struct btree *parent; struct btree *parent;
struct mutex write_lock;
unsigned long flags; unsigned long flags;
uint16_t written; /* would be nice to kill */ uint16_t written; /* would be nice to kill */
uint8_t level; uint8_t level;
...@@ -236,11 +238,13 @@ static inline void rw_unlock(bool w, struct btree *b) ...@@ -236,11 +238,13 @@ static inline void rw_unlock(bool w, struct btree *b)
} }
void bch_btree_node_read_done(struct btree *); void bch_btree_node_read_done(struct btree *);
void __bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_node_write(struct btree *, struct closure *); void bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_set_root(struct btree *); void bch_btree_set_root(struct btree *);
struct btree *bch_btree_node_alloc(struct cache_set *, int, bool); struct btree *bch_btree_node_alloc(struct cache_set *, struct btree_op *, int);
struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, int, bool); struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *,
struct bkey *, int, bool);
int bch_btree_insert_check_key(struct btree *, struct btree_op *, int bch_btree_insert_check_key(struct btree *, struct btree_op *,
struct bkey *); struct bkey *);
...@@ -248,10 +252,10 @@ int bch_btree_insert(struct cache_set *, struct keylist *, ...@@ -248,10 +252,10 @@ int bch_btree_insert(struct cache_set *, struct keylist *,
atomic_t *, struct bkey *); atomic_t *, struct bkey *);
int bch_gc_thread_start(struct cache_set *); int bch_gc_thread_start(struct cache_set *);
size_t bch_btree_gc_finish(struct cache_set *); void bch_initial_gc_finish(struct cache_set *);
void bch_moving_gc(struct cache_set *); void bch_moving_gc(struct cache_set *);
int bch_btree_check(struct cache_set *); int bch_btree_check(struct cache_set *);
uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *); void bch_initial_mark_key(struct cache_set *, int, struct bkey *);
static inline void wake_up_gc(struct cache_set *c) static inline void wake_up_gc(struct cache_set *c)
{ {
......
...@@ -194,9 +194,9 @@ static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k) ...@@ -194,9 +194,9 @@ static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
mutex_unlock(&b->c->bucket_lock); mutex_unlock(&b->c->bucket_lock);
bch_extent_to_text(buf, sizeof(buf), k); bch_extent_to_text(buf, sizeof(buf), k);
btree_bug(b, btree_bug(b,
"inconsistent btree pointer %s: bucket %zi pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", "inconsistent btree pointer %s: bucket %zi pin %i prio %i gen %i last_gc %i mark %llu",
buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); g->prio, g->gen, g->last_gc, GC_MARK(g));
return true; return true;
} }
...@@ -308,6 +308,16 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, ...@@ -308,6 +308,16 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
return NULL; return NULL;
} }
static void bch_subtract_dirty(struct bkey *k,
struct cache_set *c,
uint64_t offset,
int sectors)
{
if (KEY_DIRTY(k))
bcache_dev_sectors_dirty_add(c, KEY_INODE(k),
offset, -sectors);
}
static bool bch_extent_insert_fixup(struct btree_keys *b, static bool bch_extent_insert_fixup(struct btree_keys *b,
struct bkey *insert, struct bkey *insert,
struct btree_iter *iter, struct btree_iter *iter,
...@@ -315,13 +325,6 @@ static bool bch_extent_insert_fixup(struct btree_keys *b, ...@@ -315,13 +325,6 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
{ {
struct cache_set *c = container_of(b, struct btree, keys)->c; struct cache_set *c = container_of(b, struct btree, keys)->c;
void subtract_dirty(struct bkey *k, uint64_t offset, int sectors)
{
if (KEY_DIRTY(k))
bcache_dev_sectors_dirty_add(c, KEY_INODE(k),
offset, -sectors);
}
uint64_t old_offset; uint64_t old_offset;
unsigned old_size, sectors_found = 0; unsigned old_size, sectors_found = 0;
...@@ -398,7 +401,8 @@ static bool bch_extent_insert_fixup(struct btree_keys *b, ...@@ -398,7 +401,8 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
struct bkey *top; struct bkey *top;
subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert)); bch_subtract_dirty(k, c, KEY_START(insert),
KEY_SIZE(insert));
if (bkey_written(b, k)) { if (bkey_written(b, k)) {
/* /*
...@@ -448,7 +452,7 @@ static bool bch_extent_insert_fixup(struct btree_keys *b, ...@@ -448,7 +452,7 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
} }
} }
subtract_dirty(k, old_offset, old_size - KEY_SIZE(k)); bch_subtract_dirty(k, c, old_offset, old_size - KEY_SIZE(k));
} }
check_failed: check_failed:
...@@ -499,9 +503,9 @@ static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k, ...@@ -499,9 +503,9 @@ static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
if (mutex_trylock(&b->c->bucket_lock)) { if (mutex_trylock(&b->c->bucket_lock)) {
if (b->c->gc_mark_valid && if (b->c->gc_mark_valid &&
((GC_MARK(g) != GC_MARK_DIRTY && (!GC_MARK(g) ||
KEY_DIRTY(k)) || GC_MARK(g) == GC_MARK_METADATA ||
GC_MARK(g) == GC_MARK_METADATA)) (GC_MARK(g) != GC_MARK_DIRTY && KEY_DIRTY(k))))
goto err; goto err;
if (g->prio == BTREE_PRIO) if (g->prio == BTREE_PRIO)
...@@ -515,9 +519,9 @@ static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k, ...@@ -515,9 +519,9 @@ static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
mutex_unlock(&b->c->bucket_lock); mutex_unlock(&b->c->bucket_lock);
bch_extent_to_text(buf, sizeof(buf), k); bch_extent_to_text(buf, sizeof(buf), k);
btree_bug(b, btree_bug(b,
"inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", "inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu",
buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin), buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); g->prio, g->gen, g->last_gc, GC_MARK(g));
return true; return true;
} }
......
...@@ -237,8 +237,14 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) ...@@ -237,8 +237,14 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
for (i = 0; i < ca->sb.njournal_buckets; i++) for (i = 0; i < ca->sb.njournal_buckets; i++)
if (ja->seq[i] > seq) { if (ja->seq[i] > seq) {
seq = ja->seq[i]; seq = ja->seq[i];
ja->cur_idx = ja->discard_idx = /*
ja->last_idx = i; * When journal_reclaim() goes to allocate for
* the first time, it'll use the bucket after
* ja->cur_idx
*/
ja->cur_idx = i;
ja->last_idx = ja->discard_idx = (i + 1) %
ca->sb.njournal_buckets;
} }
} }
...@@ -288,16 +294,11 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list) ...@@ -288,16 +294,11 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
k = bkey_next(k)) { k = bkey_next(k)) {
unsigned j; unsigned j;
for (j = 0; j < KEY_PTRS(k); j++) { for (j = 0; j < KEY_PTRS(k); j++)
struct bucket *g = PTR_BUCKET(c, k, j); if (ptr_available(c, k, j))
atomic_inc(&g->pin); atomic_inc(&PTR_BUCKET(c, k, j)->pin);
if (g->prio == BTREE_PRIO &&
!ptr_stale(c, k, j))
g->prio = INITIAL_PRIO;
}
__bch_btree_mark_key(c, 0, k); bch_initial_mark_key(c, 0, k);
} }
} }
} }
...@@ -312,8 +313,6 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) ...@@ -312,8 +313,6 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
uint64_t start = i->j.last_seq, end = i->j.seq, n = start; uint64_t start = i->j.last_seq, end = i->j.seq, n = start;
struct keylist keylist; struct keylist keylist;
bch_keylist_init(&keylist);
list_for_each_entry(i, list, list) { list_for_each_entry(i, list, list) {
BUG_ON(i->pin && atomic_read(i->pin) != 1); BUG_ON(i->pin && atomic_read(i->pin) != 1);
...@@ -326,8 +325,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) ...@@ -326,8 +325,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
k = bkey_next(k)) { k = bkey_next(k)) {
trace_bcache_journal_replay_key(k); trace_bcache_journal_replay_key(k);
bkey_copy(keylist.top, k); bch_keylist_init_single(&keylist, k);
bch_keylist_push(&keylist);
ret = bch_btree_insert(s, &keylist, i->pin, NULL); ret = bch_btree_insert(s, &keylist, i->pin, NULL);
if (ret) if (ret)
...@@ -383,16 +381,15 @@ static void btree_flush_write(struct cache_set *c) ...@@ -383,16 +381,15 @@ static void btree_flush_write(struct cache_set *c)
b = best; b = best;
if (b) { if (b) {
rw_lock(true, b, b->level); mutex_lock(&b->write_lock);
if (!btree_current_write(b)->journal) { if (!btree_current_write(b)->journal) {
rw_unlock(true, b); mutex_unlock(&b->write_lock);
/* We raced */ /* We raced */
goto retry; goto retry;
} }
bch_btree_node_write(b, NULL); __bch_btree_node_write(b, NULL);
rw_unlock(true, b); mutex_unlock(&b->write_lock);
} }
} }
...@@ -536,6 +533,7 @@ void bch_journal_next(struct journal *j) ...@@ -536,6 +533,7 @@ void bch_journal_next(struct journal *j)
atomic_set(&fifo_back(&j->pin), 1); atomic_set(&fifo_back(&j->pin), 1);
j->cur->data->seq = ++j->seq; j->cur->data->seq = ++j->seq;
j->cur->dirty = false;
j->cur->need_write = false; j->cur->need_write = false;
j->cur->data->keys = 0; j->cur->data->keys = 0;
...@@ -731,7 +729,10 @@ static void journal_write_work(struct work_struct *work) ...@@ -731,7 +729,10 @@ static void journal_write_work(struct work_struct *work)
struct cache_set, struct cache_set,
journal.work); journal.work);
spin_lock(&c->journal.lock); spin_lock(&c->journal.lock);
if (c->journal.cur->dirty)
journal_try_write(c); journal_try_write(c);
else
spin_unlock(&c->journal.lock);
} }
/* /*
...@@ -761,7 +762,8 @@ atomic_t *bch_journal(struct cache_set *c, ...@@ -761,7 +762,8 @@ atomic_t *bch_journal(struct cache_set *c,
if (parent) { if (parent) {
closure_wait(&w->wait, parent); closure_wait(&w->wait, parent);
journal_try_write(c); journal_try_write(c);
} else if (!w->need_write) { } else if (!w->dirty) {
w->dirty = true;
schedule_delayed_work(&c->journal.work, schedule_delayed_work(&c->journal.work,
msecs_to_jiffies(c->journal_delay_ms)); msecs_to_jiffies(c->journal_delay_ms));
spin_unlock(&c->journal.lock); spin_unlock(&c->journal.lock);
......
...@@ -95,6 +95,7 @@ struct journal_write { ...@@ -95,6 +95,7 @@ struct journal_write {
struct cache_set *c; struct cache_set *c;
struct closure_waitlist wait; struct closure_waitlist wait;
bool dirty;
bool need_write; bool need_write;
}; };
......
...@@ -24,12 +24,10 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k) ...@@ -24,12 +24,10 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
moving_gc_keys); moving_gc_keys);
unsigned i; unsigned i;
for (i = 0; i < KEY_PTRS(k); i++) { for (i = 0; i < KEY_PTRS(k); i++)
struct bucket *g = PTR_BUCKET(c, k, i); if (ptr_available(c, k, i) &&
GC_MOVE(PTR_BUCKET(c, k, i)))
if (GC_MOVE(g))
return true; return true;
}
return false; return false;
} }
...@@ -115,7 +113,7 @@ static void write_moving(struct closure *cl) ...@@ -115,7 +113,7 @@ static void write_moving(struct closure *cl)
closure_call(&op->cl, bch_data_insert, NULL, cl); closure_call(&op->cl, bch_data_insert, NULL, cl);
} }
continue_at(cl, write_moving_finish, system_wq); continue_at(cl, write_moving_finish, op->wq);
} }
static void read_moving_submit(struct closure *cl) static void read_moving_submit(struct closure *cl)
...@@ -125,7 +123,7 @@ static void read_moving_submit(struct closure *cl) ...@@ -125,7 +123,7 @@ static void read_moving_submit(struct closure *cl)
bch_submit_bbio(bio, io->op.c, &io->w->key, 0); bch_submit_bbio(bio, io->op.c, &io->w->key, 0);
continue_at(cl, write_moving, system_wq); continue_at(cl, write_moving, io->op.wq);
} }
static void read_moving(struct cache_set *c) static void read_moving(struct cache_set *c)
...@@ -160,6 +158,7 @@ static void read_moving(struct cache_set *c) ...@@ -160,6 +158,7 @@ static void read_moving(struct cache_set *c)
io->w = w; io->w = w;
io->op.inode = KEY_INODE(&w->key); io->op.inode = KEY_INODE(&w->key);
io->op.c = c; io->op.c = c;
io->op.wq = c->moving_gc_wq;
moving_init(io); moving_init(io);
bio = &io->bio.bio; bio = &io->bio.bio;
...@@ -216,7 +215,10 @@ void bch_moving_gc(struct cache_set *c) ...@@ -216,7 +215,10 @@ void bch_moving_gc(struct cache_set *c)
ca->heap.used = 0; ca->heap.used = 0;
for_each_bucket(b, ca) { for_each_bucket(b, ca) {
if (!GC_SECTORS_USED(b)) if (GC_MARK(b) == GC_MARK_METADATA ||
!GC_SECTORS_USED(b) ||
GC_SECTORS_USED(b) == ca->sb.bucket_size ||
atomic_read(&b->pin))
continue; continue;
if (!heap_full(&ca->heap)) { if (!heap_full(&ca->heap)) {
......
...@@ -12,11 +12,9 @@ ...@@ -12,11 +12,9 @@
#include "request.h" #include "request.h"
#include "writeback.h" #include "writeback.h"
#include <linux/cgroup.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/hash.h> #include <linux/hash.h>
#include <linux/random.h> #include <linux/random.h>
#include "blk-cgroup.h"
#include <trace/events/bcache.h> #include <trace/events/bcache.h>
...@@ -27,171 +25,13 @@ struct kmem_cache *bch_search_cache; ...@@ -27,171 +25,13 @@ struct kmem_cache *bch_search_cache;
static void bch_data_insert_start(struct closure *); static void bch_data_insert_start(struct closure *);
/* Cgroup interface */
#ifdef CONFIG_CGROUP_BCACHE
static struct bch_cgroup bcache_default_cgroup = { .cache_mode = -1 };
static struct bch_cgroup *cgroup_to_bcache(struct cgroup *cgroup)
{
struct cgroup_subsys_state *css;
return cgroup &&
(css = cgroup_subsys_state(cgroup, bcache_subsys_id))
? container_of(css, struct bch_cgroup, css)
: &bcache_default_cgroup;
}
struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio)
{
struct cgroup_subsys_state *css = bio->bi_css
? cgroup_subsys_state(bio->bi_css->cgroup, bcache_subsys_id)
: task_subsys_state(current, bcache_subsys_id);
return css
? container_of(css, struct bch_cgroup, css)
: &bcache_default_cgroup;
}
static ssize_t cache_mode_read(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
char __user *buf, size_t nbytes, loff_t *ppos)
{
char tmp[1024];
int len = bch_snprint_string_list(tmp, PAGE_SIZE, bch_cache_modes,
cgroup_to_bcache(cgrp)->cache_mode + 1);
if (len < 0)
return len;
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
static int cache_mode_write(struct cgroup *cgrp, struct cftype *cft,
const char *buf)
{
int v = bch_read_string_list(buf, bch_cache_modes);
if (v < 0)
return v;
cgroup_to_bcache(cgrp)->cache_mode = v - 1;
return 0;
}
static u64 bch_verify_read(struct cgroup *cgrp, struct cftype *cft)
{
return cgroup_to_bcache(cgrp)->verify;
}
static int bch_verify_write(struct cgroup *cgrp, struct cftype *cft, u64 val)
{
cgroup_to_bcache(cgrp)->verify = val;
return 0;
}
static u64 bch_cache_hits_read(struct cgroup *cgrp, struct cftype *cft)
{
struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp);
return atomic_read(&bcachecg->stats.cache_hits);
}
static u64 bch_cache_misses_read(struct cgroup *cgrp, struct cftype *cft)
{
struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp);
return atomic_read(&bcachecg->stats.cache_misses);
}
static u64 bch_cache_bypass_hits_read(struct cgroup *cgrp,
struct cftype *cft)
{
struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp);
return atomic_read(&bcachecg->stats.cache_bypass_hits);
}
static u64 bch_cache_bypass_misses_read(struct cgroup *cgrp,
struct cftype *cft)
{
struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp);
return atomic_read(&bcachecg->stats.cache_bypass_misses);
}
static struct cftype bch_files[] = {
{
.name = "cache_mode",
.read = cache_mode_read,
.write_string = cache_mode_write,
},
{
.name = "verify",
.read_u64 = bch_verify_read,
.write_u64 = bch_verify_write,
},
{
.name = "cache_hits",
.read_u64 = bch_cache_hits_read,
},
{
.name = "cache_misses",
.read_u64 = bch_cache_misses_read,
},
{
.name = "cache_bypass_hits",
.read_u64 = bch_cache_bypass_hits_read,
},
{
.name = "cache_bypass_misses",
.read_u64 = bch_cache_bypass_misses_read,
},
{ } /* terminate */
};
static void init_bch_cgroup(struct bch_cgroup *cg)
{
cg->cache_mode = -1;
}
static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup)
{
struct bch_cgroup *cg;
cg = kzalloc(sizeof(*cg), GFP_KERNEL);
if (!cg)
return ERR_PTR(-ENOMEM);
init_bch_cgroup(cg);
return &cg->css;
}
static void bcachecg_destroy(struct cgroup *cgroup)
{
struct bch_cgroup *cg = cgroup_to_bcache(cgroup);
kfree(cg);
}
struct cgroup_subsys bcache_subsys = {
.create = bcachecg_create,
.destroy = bcachecg_destroy,
.subsys_id = bcache_subsys_id,
.name = "bcache",
.module = THIS_MODULE,
};
EXPORT_SYMBOL_GPL(bcache_subsys);
#endif
static unsigned cache_mode(struct cached_dev *dc, struct bio *bio) static unsigned cache_mode(struct cached_dev *dc, struct bio *bio)
{ {
#ifdef CONFIG_CGROUP_BCACHE
int r = bch_bio_to_cgroup(bio)->cache_mode;
if (r >= 0)
return r;
#endif
return BDEV_CACHE_MODE(&dc->sb); return BDEV_CACHE_MODE(&dc->sb);
} }
static bool verify(struct cached_dev *dc, struct bio *bio) static bool verify(struct cached_dev *dc, struct bio *bio)
{ {
#ifdef CONFIG_CGROUP_BCACHE
if (bch_bio_to_cgroup(bio)->verify)
return true;
#endif
return dc->verify; return dc->verify;
} }
...@@ -248,7 +88,7 @@ static void bch_data_insert_keys(struct closure *cl) ...@@ -248,7 +88,7 @@ static void bch_data_insert_keys(struct closure *cl)
atomic_dec_bug(journal_ref); atomic_dec_bug(journal_ref);
if (!op->insert_data_done) if (!op->insert_data_done)
continue_at(cl, bch_data_insert_start, bcache_wq); continue_at(cl, bch_data_insert_start, op->wq);
bch_keylist_free(&op->insert_keys); bch_keylist_free(&op->insert_keys);
closure_return(cl); closure_return(cl);
...@@ -297,7 +137,7 @@ static void bch_data_invalidate(struct closure *cl) ...@@ -297,7 +137,7 @@ static void bch_data_invalidate(struct closure *cl)
op->insert_data_done = true; op->insert_data_done = true;
bio_put(bio); bio_put(bio);
out: out:
continue_at(cl, bch_data_insert_keys, bcache_wq); continue_at(cl, bch_data_insert_keys, op->wq);
} }
static void bch_data_insert_error(struct closure *cl) static void bch_data_insert_error(struct closure *cl)
...@@ -340,7 +180,7 @@ static void bch_data_insert_endio(struct bio *bio, int error) ...@@ -340,7 +180,7 @@ static void bch_data_insert_endio(struct bio *bio, int error)
if (op->writeback) if (op->writeback)
op->error = error; op->error = error;
else if (!op->replace) else if (!op->replace)
set_closure_fn(cl, bch_data_insert_error, bcache_wq); set_closure_fn(cl, bch_data_insert_error, op->wq);
else else
set_closure_fn(cl, NULL, NULL); set_closure_fn(cl, NULL, NULL);
} }
...@@ -376,7 +216,7 @@ static void bch_data_insert_start(struct closure *cl) ...@@ -376,7 +216,7 @@ static void bch_data_insert_start(struct closure *cl)
if (bch_keylist_realloc(&op->insert_keys, if (bch_keylist_realloc(&op->insert_keys,
3 + (op->csum ? 1 : 0), 3 + (op->csum ? 1 : 0),
op->c)) op->c))
continue_at(cl, bch_data_insert_keys, bcache_wq); continue_at(cl, bch_data_insert_keys, op->wq);
k = op->insert_keys.top; k = op->insert_keys.top;
bkey_init(k); bkey_init(k);
...@@ -413,7 +253,7 @@ static void bch_data_insert_start(struct closure *cl) ...@@ -413,7 +253,7 @@ static void bch_data_insert_start(struct closure *cl)
} while (n != bio); } while (n != bio);
op->insert_data_done = true; op->insert_data_done = true;
continue_at(cl, bch_data_insert_keys, bcache_wq); continue_at(cl, bch_data_insert_keys, op->wq);
err: err:
/* bch_alloc_sectors() blocks if s->writeback = true */ /* bch_alloc_sectors() blocks if s->writeback = true */
BUG_ON(op->writeback); BUG_ON(op->writeback);
...@@ -442,7 +282,7 @@ static void bch_data_insert_start(struct closure *cl) ...@@ -442,7 +282,7 @@ static void bch_data_insert_start(struct closure *cl)
bio_put(bio); bio_put(bio);
if (!bch_keylist_empty(&op->insert_keys)) if (!bch_keylist_empty(&op->insert_keys))
continue_at(cl, bch_data_insert_keys, bcache_wq); continue_at(cl, bch_data_insert_keys, op->wq);
else else
closure_return(cl); closure_return(cl);
} }
...@@ -824,6 +664,7 @@ static inline struct search *search_alloc(struct bio *bio, ...@@ -824,6 +664,7 @@ static inline struct search *search_alloc(struct bio *bio,
s->iop.error = 0; s->iop.error = 0;
s->iop.flags = 0; s->iop.flags = 0;
s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
s->iop.wq = bcache_wq;
return s; return s;
} }
...@@ -1203,22 +1044,13 @@ void bch_cached_dev_request_init(struct cached_dev *dc) ...@@ -1203,22 +1044,13 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
static int flash_dev_cache_miss(struct btree *b, struct search *s, static int flash_dev_cache_miss(struct btree *b, struct search *s,
struct bio *bio, unsigned sectors) struct bio *bio, unsigned sectors)
{ {
struct bio_vec bv; unsigned bytes = min(sectors, bio_sectors(bio)) << 9;
struct bvec_iter iter;
/* Zero fill bio */
bio_for_each_segment(bv, bio, iter) { swap(bio->bi_iter.bi_size, bytes);
unsigned j = min(bv.bv_len >> 9, sectors); zero_fill_bio(bio);
swap(bio->bi_iter.bi_size, bytes);
void *p = kmap(bv.bv_page);
memset(p + bv.bv_offset, 0, j << 9);
kunmap(bv.bv_page);
sectors -= j;
}
bio_advance(bio, min(sectors << 9, bio->bi_iter.bi_size)); bio_advance(bio, bytes);
if (!bio->bi_iter.bi_size) if (!bio->bi_iter.bi_size)
return MAP_DONE; return MAP_DONE;
...@@ -1313,9 +1145,6 @@ void bch_flash_dev_request_init(struct bcache_device *d) ...@@ -1313,9 +1145,6 @@ void bch_flash_dev_request_init(struct bcache_device *d)
void bch_request_exit(void) void bch_request_exit(void)
{ {
#ifdef CONFIG_CGROUP_BCACHE
cgroup_unload_subsys(&bcache_subsys);
#endif
if (bch_search_cache) if (bch_search_cache)
kmem_cache_destroy(bch_search_cache); kmem_cache_destroy(bch_search_cache);
} }
...@@ -1326,11 +1155,5 @@ int __init bch_request_init(void) ...@@ -1326,11 +1155,5 @@ int __init bch_request_init(void)
if (!bch_search_cache) if (!bch_search_cache)
return -ENOMEM; return -ENOMEM;
#ifdef CONFIG_CGROUP_BCACHE
cgroup_load_subsys(&bcache_subsys);
init_bch_cgroup(&bcache_default_cgroup);
cgroup_add_cftypes(&bcache_subsys, bch_files);
#endif
return 0; return 0;
} }
#ifndef _BCACHE_REQUEST_H_ #ifndef _BCACHE_REQUEST_H_
#define _BCACHE_REQUEST_H_ #define _BCACHE_REQUEST_H_
#include <linux/cgroup.h>
struct data_insert_op { struct data_insert_op {
struct closure cl; struct closure cl;
struct cache_set *c; struct cache_set *c;
struct bio *bio; struct bio *bio;
struct workqueue_struct *wq;
unsigned inode; unsigned inode;
uint16_t write_point; uint16_t write_point;
...@@ -41,20 +40,4 @@ void bch_flash_dev_request_init(struct bcache_device *d); ...@@ -41,20 +40,4 @@ void bch_flash_dev_request_init(struct bcache_device *d);
extern struct kmem_cache *bch_search_cache, *bch_passthrough_cache; extern struct kmem_cache *bch_search_cache, *bch_passthrough_cache;
struct bch_cgroup {
#ifdef CONFIG_CGROUP_BCACHE
struct cgroup_subsys_state css;
#endif
/*
* We subtract one from the index into bch_cache_modes[], so that
* default == -1; this makes it so the rest match up with d->cache_mode,
* and we use d->cache_mode if cgrp->cache_mode < 0
*/
short cache_mode;
bool verify;
struct cache_stat_collector stats;
};
struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio);
#endif /* _BCACHE_REQUEST_H_ */ #endif /* _BCACHE_REQUEST_H_ */
...@@ -201,9 +201,6 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d, ...@@ -201,9 +201,6 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d,
struct cached_dev *dc = container_of(d, struct cached_dev, disk); struct cached_dev *dc = container_of(d, struct cached_dev, disk);
mark_cache_stats(&dc->accounting.collector, hit, bypass); mark_cache_stats(&dc->accounting.collector, hit, bypass);
mark_cache_stats(&c->accounting.collector, hit, bypass); mark_cache_stats(&c->accounting.collector, hit, bypass);
#ifdef CONFIG_CGROUP_BCACHE
mark_cache_stats(&(bch_bio_to_cgroup(s->orig_bio)->stats), hit, bypass);
#endif
} }
void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d) void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d)
......
...@@ -541,9 +541,6 @@ static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw) ...@@ -541,9 +541,6 @@ static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw)
closure_sync(cl); closure_sync(cl);
} }
#define buckets_free(c) "free %zu, free_inc %zu, unused %zu", \
fifo_used(&c->free), fifo_used(&c->free_inc), fifo_used(&c->unused)
void bch_prio_write(struct cache *ca) void bch_prio_write(struct cache *ca)
{ {
int i; int i;
...@@ -554,10 +551,6 @@ void bch_prio_write(struct cache *ca) ...@@ -554,10 +551,6 @@ void bch_prio_write(struct cache *ca)
lockdep_assert_held(&ca->set->bucket_lock); lockdep_assert_held(&ca->set->bucket_lock);
for (b = ca->buckets;
b < ca->buckets + ca->sb.nbuckets; b++)
b->disk_gen = b->gen;
ca->disk_buckets->seq++; ca->disk_buckets->seq++;
atomic_long_add(ca->sb.bucket_size * prio_buckets(ca), atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
...@@ -601,14 +594,17 @@ void bch_prio_write(struct cache *ca) ...@@ -601,14 +594,17 @@ void bch_prio_write(struct cache *ca)
mutex_lock(&ca->set->bucket_lock); mutex_lock(&ca->set->bucket_lock);
ca->need_save_prio = 0;
/* /*
* Don't want the old priorities to get garbage collected until after we * Don't want the old priorities to get garbage collected until after we
* finish writing the new ones, and they're journalled * finish writing the new ones, and they're journalled
*/ */
for (i = 0; i < prio_buckets(ca); i++) for (i = 0; i < prio_buckets(ca); i++) {
if (ca->prio_last_buckets[i])
__bch_bucket_free(ca,
&ca->buckets[ca->prio_last_buckets[i]]);
ca->prio_last_buckets[i] = ca->prio_buckets[i]; ca->prio_last_buckets[i] = ca->prio_buckets[i];
}
} }
static void prio_read(struct cache *ca, uint64_t bucket) static void prio_read(struct cache *ca, uint64_t bucket)
...@@ -639,7 +635,7 @@ static void prio_read(struct cache *ca, uint64_t bucket) ...@@ -639,7 +635,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
} }
b->prio = le16_to_cpu(d->prio); b->prio = le16_to_cpu(d->prio);
b->gen = b->disk_gen = b->last_gc = b->gc_gen = d->gen; b->gen = b->last_gc = d->gen;
} }
} }
...@@ -843,6 +839,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, ...@@ -843,6 +839,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
q->limits.max_segment_size = UINT_MAX; q->limits.max_segment_size = UINT_MAX;
q->limits.max_segments = BIO_MAX_PAGES; q->limits.max_segments = BIO_MAX_PAGES;
q->limits.max_discard_sectors = UINT_MAX; q->limits.max_discard_sectors = UINT_MAX;
q->limits.discard_granularity = 512;
q->limits.io_min = block_size; q->limits.io_min = block_size;
q->limits.logical_block_size = block_size; q->limits.logical_block_size = block_size;
q->limits.physical_block_size = block_size; q->limits.physical_block_size = block_size;
...@@ -1355,6 +1352,8 @@ static void cache_set_free(struct closure *cl) ...@@ -1355,6 +1352,8 @@ static void cache_set_free(struct closure *cl)
bch_bset_sort_state_free(&c->sort); bch_bset_sort_state_free(&c->sort);
free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
if (c->moving_gc_wq)
destroy_workqueue(c->moving_gc_wq);
if (c->bio_split) if (c->bio_split)
bioset_free(c->bio_split); bioset_free(c->bio_split);
if (c->fill_iter) if (c->fill_iter)
...@@ -1395,14 +1394,21 @@ static void cache_set_flush(struct closure *cl) ...@@ -1395,14 +1394,21 @@ static void cache_set_flush(struct closure *cl)
list_add(&c->root->list, &c->btree_cache); list_add(&c->root->list, &c->btree_cache);
/* Should skip this if we're unregistering because of an error */ /* Should skip this if we're unregistering because of an error */
list_for_each_entry(b, &c->btree_cache, list) list_for_each_entry(b, &c->btree_cache, list) {
mutex_lock(&b->write_lock);
if (btree_node_dirty(b)) if (btree_node_dirty(b))
bch_btree_node_write(b, NULL); __bch_btree_node_write(b, NULL);
mutex_unlock(&b->write_lock);
}
for_each_cache(ca, c, i) for_each_cache(ca, c, i)
if (ca->alloc_thread) if (ca->alloc_thread)
kthread_stop(ca->alloc_thread); kthread_stop(ca->alloc_thread);
cancel_delayed_work_sync(&c->journal.work);
/* flush last journal entry if needed */
c->journal.work.work.func(&c->journal.work.work);
closure_return(cl); closure_return(cl);
} }
...@@ -1485,14 +1491,13 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) ...@@ -1485,14 +1491,13 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
sema_init(&c->sb_write_mutex, 1); sema_init(&c->sb_write_mutex, 1);
mutex_init(&c->bucket_lock); mutex_init(&c->bucket_lock);
init_waitqueue_head(&c->try_wait); init_waitqueue_head(&c->btree_cache_wait);
init_waitqueue_head(&c->bucket_wait); init_waitqueue_head(&c->bucket_wait);
sema_init(&c->uuid_write_mutex, 1); sema_init(&c->uuid_write_mutex, 1);
spin_lock_init(&c->btree_gc_time.lock); spin_lock_init(&c->btree_gc_time.lock);
spin_lock_init(&c->btree_split_time.lock); spin_lock_init(&c->btree_split_time.lock);
spin_lock_init(&c->btree_read_time.lock); spin_lock_init(&c->btree_read_time.lock);
spin_lock_init(&c->try_harder_time.lock);
bch_moving_init_cache_set(c); bch_moving_init_cache_set(c);
...@@ -1517,6 +1522,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) ...@@ -1517,6 +1522,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
!(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
!(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
!(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
!(c->moving_gc_wq = create_workqueue("bcache_gc")) ||
bch_journal_alloc(c) || bch_journal_alloc(c) ||
bch_btree_cache_alloc(c) || bch_btree_cache_alloc(c) ||
bch_open_buckets_alloc(c) || bch_open_buckets_alloc(c) ||
...@@ -1580,7 +1586,7 @@ static void run_cache_set(struct cache_set *c) ...@@ -1580,7 +1586,7 @@ static void run_cache_set(struct cache_set *c)
goto err; goto err;
err = "error reading btree root"; err = "error reading btree root";
c->root = bch_btree_node_get(c, k, j->btree_level, true); c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true);
if (IS_ERR_OR_NULL(c->root)) if (IS_ERR_OR_NULL(c->root))
goto err; goto err;
...@@ -1596,7 +1602,7 @@ static void run_cache_set(struct cache_set *c) ...@@ -1596,7 +1602,7 @@ static void run_cache_set(struct cache_set *c)
goto err; goto err;
bch_journal_mark(c, &journal); bch_journal_mark(c, &journal);
bch_btree_gc_finish(c); bch_initial_gc_finish(c);
pr_debug("btree_check() done"); pr_debug("btree_check() done");
/* /*
...@@ -1638,7 +1644,7 @@ static void run_cache_set(struct cache_set *c) ...@@ -1638,7 +1644,7 @@ static void run_cache_set(struct cache_set *c)
ca->sb.d[j] = ca->sb.first_bucket + j; ca->sb.d[j] = ca->sb.first_bucket + j;
} }
bch_btree_gc_finish(c); bch_initial_gc_finish(c);
err = "error starting allocator thread"; err = "error starting allocator thread";
for_each_cache(ca, c, i) for_each_cache(ca, c, i)
...@@ -1655,12 +1661,14 @@ static void run_cache_set(struct cache_set *c) ...@@ -1655,12 +1661,14 @@ static void run_cache_set(struct cache_set *c)
goto err; goto err;
err = "cannot allocate new btree root"; err = "cannot allocate new btree root";
c->root = bch_btree_node_alloc(c, 0, true); c->root = bch_btree_node_alloc(c, NULL, 0);
if (IS_ERR_OR_NULL(c->root)) if (IS_ERR_OR_NULL(c->root))
goto err; goto err;
mutex_lock(&c->root->write_lock);
bkey_copy_key(&c->root->key, &MAX_KEY); bkey_copy_key(&c->root->key, &MAX_KEY);
bch_btree_node_write(c->root, &cl); bch_btree_node_write(c->root, &cl);
mutex_unlock(&c->root->write_lock);
bch_btree_set_root(c->root); bch_btree_set_root(c->root);
rw_unlock(true, c->root); rw_unlock(true, c->root);
...@@ -1782,7 +1790,6 @@ void bch_cache_release(struct kobject *kobj) ...@@ -1782,7 +1790,6 @@ void bch_cache_release(struct kobject *kobj)
vfree(ca->buckets); vfree(ca->buckets);
free_heap(&ca->heap); free_heap(&ca->heap);
free_fifo(&ca->unused);
free_fifo(&ca->free_inc); free_fifo(&ca->free_inc);
for (i = 0; i < RESERVE_NR; i++) for (i = 0; i < RESERVE_NR; i++)
...@@ -1819,7 +1826,6 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) ...@@ -1819,7 +1826,6 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) || !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) ||
!init_fifo(&ca->unused, free << 2, GFP_KERNEL) ||
!init_heap(&ca->heap, free << 3, GFP_KERNEL) || !init_heap(&ca->heap, free << 3, GFP_KERNEL) ||
!(ca->buckets = vzalloc(sizeof(struct bucket) * !(ca->buckets = vzalloc(sizeof(struct bucket) *
ca->sb.nbuckets)) || ca->sb.nbuckets)) ||
...@@ -1834,13 +1840,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) ...@@ -1834,13 +1840,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
for_each_bucket(b, ca) for_each_bucket(b, ca)
atomic_set(&b->pin, 0); atomic_set(&b->pin, 0);
if (bch_cache_allocator_init(ca))
goto err;
return 0; return 0;
err:
kobject_put(&ca->kobj);
return -ENOMEM;
} }
static void register_cache(struct cache_sb *sb, struct page *sb_page, static void register_cache(struct cache_sb *sb, struct page *sb_page,
...@@ -1869,7 +1869,10 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page, ...@@ -1869,7 +1869,10 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page,
if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache"))
goto err; goto err;
mutex_lock(&bch_register_lock);
err = register_cache_set(ca); err = register_cache_set(ca);
mutex_unlock(&bch_register_lock);
if (err) if (err)
goto err; goto err;
...@@ -1931,8 +1934,6 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ...@@ -1931,8 +1934,6 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!try_module_get(THIS_MODULE)) if (!try_module_get(THIS_MODULE))
return -EBUSY; return -EBUSY;
mutex_lock(&bch_register_lock);
if (!(path = kstrndup(buffer, size, GFP_KERNEL)) || if (!(path = kstrndup(buffer, size, GFP_KERNEL)) ||
!(sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL))) !(sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL)))
goto err; goto err;
...@@ -1965,7 +1966,9 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ...@@ -1965,7 +1966,9 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!dc) if (!dc)
goto err_close; goto err_close;
mutex_lock(&bch_register_lock);
register_bdev(sb, sb_page, bdev, dc); register_bdev(sb, sb_page, bdev, dc);
mutex_unlock(&bch_register_lock);
} else { } else {
struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca) if (!ca)
...@@ -1978,7 +1981,6 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ...@@ -1978,7 +1981,6 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
put_page(sb_page); put_page(sb_page);
kfree(sb); kfree(sb);
kfree(path); kfree(path);
mutex_unlock(&bch_register_lock);
module_put(THIS_MODULE); module_put(THIS_MODULE);
return ret; return ret;
...@@ -2057,7 +2059,6 @@ static void bcache_exit(void) ...@@ -2057,7 +2059,6 @@ static void bcache_exit(void)
{ {
bch_debug_exit(); bch_debug_exit();
bch_request_exit(); bch_request_exit();
bch_btree_exit();
if (bcache_kobj) if (bcache_kobj)
kobject_put(bcache_kobj); kobject_put(bcache_kobj);
if (bcache_wq) if (bcache_wq)
...@@ -2087,7 +2088,6 @@ static int __init bcache_init(void) ...@@ -2087,7 +2088,6 @@ static int __init bcache_init(void)
if (!(bcache_wq = create_workqueue("bcache")) || if (!(bcache_wq = create_workqueue("bcache")) ||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
sysfs_create_files(bcache_kobj, files) || sysfs_create_files(bcache_kobj, files) ||
bch_btree_init() ||
bch_request_init() || bch_request_init() ||
bch_debug_init(bcache_kobj)) bch_debug_init(bcache_kobj))
goto err; goto err;
......
...@@ -54,7 +54,6 @@ sysfs_time_stats_attribute(btree_gc, sec, ms); ...@@ -54,7 +54,6 @@ sysfs_time_stats_attribute(btree_gc, sec, ms);
sysfs_time_stats_attribute(btree_split, sec, us); sysfs_time_stats_attribute(btree_split, sec, us);
sysfs_time_stats_attribute(btree_sort, ms, us); sysfs_time_stats_attribute(btree_sort, ms, us);
sysfs_time_stats_attribute(btree_read, ms, us); sysfs_time_stats_attribute(btree_read, ms, us);
sysfs_time_stats_attribute(try_harder, ms, us);
read_attribute(btree_nodes); read_attribute(btree_nodes);
read_attribute(btree_used_percent); read_attribute(btree_used_percent);
...@@ -406,7 +405,7 @@ struct bset_stats_op { ...@@ -406,7 +405,7 @@ struct bset_stats_op {
struct bset_stats stats; struct bset_stats stats;
}; };
static int btree_bset_stats(struct btree_op *b_op, struct btree *b) static int bch_btree_bset_stats(struct btree_op *b_op, struct btree *b)
{ {
struct bset_stats_op *op = container_of(b_op, struct bset_stats_op, op); struct bset_stats_op *op = container_of(b_op, struct bset_stats_op, op);
...@@ -424,7 +423,7 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf) ...@@ -424,7 +423,7 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf)
memset(&op, 0, sizeof(op)); memset(&op, 0, sizeof(op));
bch_btree_op_init(&op.op, -1); bch_btree_op_init(&op.op, -1);
ret = bch_btree_map_nodes(&op.op, c, &ZERO_KEY, btree_bset_stats); ret = bch_btree_map_nodes(&op.op, c, &ZERO_KEY, bch_btree_bset_stats);
if (ret < 0) if (ret < 0)
return ret; return ret;
...@@ -442,10 +441,8 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf) ...@@ -442,10 +441,8 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf)
op.stats.floats, op.stats.failed); op.stats.floats, op.stats.failed);
} }
SHOW(__bch_cache_set) static unsigned bch_root_usage(struct cache_set *c)
{ {
unsigned root_usage(struct cache_set *c)
{
unsigned bytes = 0; unsigned bytes = 0;
struct bkey *k; struct bkey *k;
struct btree *b; struct btree *b;
...@@ -466,10 +463,10 @@ SHOW(__bch_cache_set) ...@@ -466,10 +463,10 @@ SHOW(__bch_cache_set)
rw_unlock(false, b); rw_unlock(false, b);
return (bytes * 100) / btree_bytes(c); return (bytes * 100) / btree_bytes(c);
} }
size_t cache_size(struct cache_set *c) static size_t bch_cache_size(struct cache_set *c)
{ {
size_t ret = 0; size_t ret = 0;
struct btree *b; struct btree *b;
...@@ -479,10 +476,10 @@ SHOW(__bch_cache_set) ...@@ -479,10 +476,10 @@ SHOW(__bch_cache_set)
mutex_unlock(&c->bucket_lock); mutex_unlock(&c->bucket_lock);
return ret; return ret;
} }
unsigned cache_max_chain(struct cache_set *c) static unsigned bch_cache_max_chain(struct cache_set *c)
{ {
unsigned ret = 0; unsigned ret = 0;
struct hlist_head *h; struct hlist_head *h;
...@@ -502,21 +499,23 @@ SHOW(__bch_cache_set) ...@@ -502,21 +499,23 @@ SHOW(__bch_cache_set)
mutex_unlock(&c->bucket_lock); mutex_unlock(&c->bucket_lock);
return ret; return ret;
} }
unsigned btree_used(struct cache_set *c) static unsigned bch_btree_used(struct cache_set *c)
{ {
return div64_u64(c->gc_stats.key_bytes * 100, return div64_u64(c->gc_stats.key_bytes * 100,
(c->gc_stats.nodes ?: 1) * btree_bytes(c)); (c->gc_stats.nodes ?: 1) * btree_bytes(c));
} }
unsigned average_key_size(struct cache_set *c) static unsigned bch_average_key_size(struct cache_set *c)
{ {
return c->gc_stats.nkeys return c->gc_stats.nkeys
? div64_u64(c->gc_stats.data, c->gc_stats.nkeys) ? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
: 0; : 0;
} }
SHOW(__bch_cache_set)
{
struct cache_set *c = container_of(kobj, struct cache_set, kobj); struct cache_set *c = container_of(kobj, struct cache_set, kobj);
sysfs_print(synchronous, CACHE_SYNC(&c->sb)); sysfs_print(synchronous, CACHE_SYNC(&c->sb));
...@@ -524,21 +523,20 @@ SHOW(__bch_cache_set) ...@@ -524,21 +523,20 @@ SHOW(__bch_cache_set)
sysfs_hprint(bucket_size, bucket_bytes(c)); sysfs_hprint(bucket_size, bucket_bytes(c));
sysfs_hprint(block_size, block_bytes(c)); sysfs_hprint(block_size, block_bytes(c));
sysfs_print(tree_depth, c->root->level); sysfs_print(tree_depth, c->root->level);
sysfs_print(root_usage_percent, root_usage(c)); sysfs_print(root_usage_percent, bch_root_usage(c));
sysfs_hprint(btree_cache_size, cache_size(c)); sysfs_hprint(btree_cache_size, bch_cache_size(c));
sysfs_print(btree_cache_max_chain, cache_max_chain(c)); sysfs_print(btree_cache_max_chain, bch_cache_max_chain(c));
sysfs_print(cache_available_percent, 100 - c->gc_stats.in_use); sysfs_print(cache_available_percent, 100 - c->gc_stats.in_use);
sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms); sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms);
sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us); sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us);
sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us); sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us);
sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us); sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us);
sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us);
sysfs_print(btree_used_percent, btree_used(c)); sysfs_print(btree_used_percent, bch_btree_used(c));
sysfs_print(btree_nodes, c->gc_stats.nodes); sysfs_print(btree_nodes, c->gc_stats.nodes);
sysfs_hprint(average_key_size, average_key_size(c)); sysfs_hprint(average_key_size, bch_average_key_size(c));
sysfs_print(cache_read_races, sysfs_print(cache_read_races,
atomic_long_read(&c->cache_read_races)); atomic_long_read(&c->cache_read_races));
...@@ -709,7 +707,6 @@ static struct attribute *bch_cache_set_internal_files[] = { ...@@ -709,7 +707,6 @@ static struct attribute *bch_cache_set_internal_files[] = {
sysfs_time_stats_attribute_list(btree_split, sec, us) sysfs_time_stats_attribute_list(btree_split, sec, us)
sysfs_time_stats_attribute_list(btree_sort, ms, us) sysfs_time_stats_attribute_list(btree_sort, ms, us)
sysfs_time_stats_attribute_list(btree_read, ms, us) sysfs_time_stats_attribute_list(btree_read, ms, us)
sysfs_time_stats_attribute_list(try_harder, ms, us)
&sysfs_btree_nodes, &sysfs_btree_nodes,
&sysfs_btree_used_percent, &sysfs_btree_used_percent,
...@@ -761,7 +758,9 @@ SHOW(__bch_cache) ...@@ -761,7 +758,9 @@ SHOW(__bch_cache)
int cmp(const void *l, const void *r) int cmp(const void *l, const void *r)
{ return *((uint16_t *) r) - *((uint16_t *) l); } { return *((uint16_t *) r) - *((uint16_t *) l); }
size_t n = ca->sb.nbuckets, i, unused, btree; struct bucket *b;
size_t n = ca->sb.nbuckets, i;
size_t unused = 0, available = 0, dirty = 0, meta = 0;
uint64_t sum = 0; uint64_t sum = 0;
/* Compute 31 quantiles */ /* Compute 31 quantiles */
uint16_t q[31], *p, *cached; uint16_t q[31], *p, *cached;
...@@ -772,6 +771,17 @@ SHOW(__bch_cache) ...@@ -772,6 +771,17 @@ SHOW(__bch_cache)
return -ENOMEM; return -ENOMEM;
mutex_lock(&ca->set->bucket_lock); mutex_lock(&ca->set->bucket_lock);
for_each_bucket(b, ca) {
if (!GC_SECTORS_USED(b))
unused++;
if (GC_MARK(b) == GC_MARK_RECLAIMABLE)
available++;
if (GC_MARK(b) == GC_MARK_DIRTY)
dirty++;
if (GC_MARK(b) == GC_MARK_METADATA)
meta++;
}
for (i = ca->sb.first_bucket; i < n; i++) for (i = ca->sb.first_bucket; i < n; i++)
p[i] = ca->buckets[i].prio; p[i] = ca->buckets[i].prio;
mutex_unlock(&ca->set->bucket_lock); mutex_unlock(&ca->set->bucket_lock);
...@@ -786,10 +796,7 @@ SHOW(__bch_cache) ...@@ -786,10 +796,7 @@ SHOW(__bch_cache)
while (cached < p + n && while (cached < p + n &&
*cached == BTREE_PRIO) *cached == BTREE_PRIO)
cached++; cached++, n--;
btree = cached - p;
n -= btree;
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
sum += INITIAL_PRIO - cached[i]; sum += INITIAL_PRIO - cached[i];
...@@ -805,12 +812,16 @@ SHOW(__bch_cache) ...@@ -805,12 +812,16 @@ SHOW(__bch_cache)
ret = scnprintf(buf, PAGE_SIZE, ret = scnprintf(buf, PAGE_SIZE,
"Unused: %zu%%\n" "Unused: %zu%%\n"
"Clean: %zu%%\n"
"Dirty: %zu%%\n"
"Metadata: %zu%%\n" "Metadata: %zu%%\n"
"Average: %llu\n" "Average: %llu\n"
"Sectors per Q: %zu\n" "Sectors per Q: %zu\n"
"Quantiles: [", "Quantiles: [",
unused * 100 / (size_t) ca->sb.nbuckets, unused * 100 / (size_t) ca->sb.nbuckets,
btree * 100 / (size_t) ca->sb.nbuckets, sum, available * 100 / (size_t) ca->sb.nbuckets,
dirty * 100 / (size_t) ca->sb.nbuckets,
meta * 100 / (size_t) ca->sb.nbuckets, sum,
n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1)); n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1));
for (i = 0; i < ARRAY_SIZE(q); i++) for (i = 0; i < ARRAY_SIZE(q); i++)
......
...@@ -45,7 +45,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split); ...@@ -45,7 +45,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_invalidate);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
......
...@@ -399,26 +399,43 @@ TRACE_EVENT(bcache_keyscan, ...@@ -399,26 +399,43 @@ TRACE_EVENT(bcache_keyscan,
/* Allocator */ /* Allocator */
TRACE_EVENT(bcache_alloc_invalidate, TRACE_EVENT(bcache_invalidate,
TP_PROTO(struct cache *ca), TP_PROTO(struct cache *ca, size_t bucket),
TP_ARGS(ca), TP_ARGS(ca, bucket),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned, free ) __field(unsigned, sectors )
__field(unsigned, free_inc ) __field(dev_t, dev )
__field(unsigned, free_inc_size ) __field(__u64, offset )
__field(unsigned, unused )
), ),
TP_fast_assign( TP_fast_assign(
__entry->free = fifo_used(&ca->free[RESERVE_NONE]); __entry->dev = ca->bdev->bd_dev;
__entry->free_inc = fifo_used(&ca->free_inc); __entry->offset = bucket << ca->set->bucket_bits;
__entry->free_inc_size = ca->free_inc.size; __entry->sectors = GC_SECTORS_USED(&ca->buckets[bucket]);
__entry->unused = fifo_used(&ca->unused);
), ),
TP_printk("free %u free_inc %u/%u unused %u", __entry->free, TP_printk("invalidated %u sectors at %d,%d sector=%llu",
__entry->free_inc, __entry->free_inc_size, __entry->unused) __entry->sectors, MAJOR(__entry->dev),
MINOR(__entry->dev), __entry->offset)
);
TRACE_EVENT(bcache_alloc,
TP_PROTO(struct cache *ca, size_t bucket),
TP_ARGS(ca, bucket),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(__u64, offset )
),
TP_fast_assign(
__entry->dev = ca->bdev->bd_dev;
__entry->offset = bucket << ca->set->bucket_bits;
),
TP_printk("allocated %d,%d sector=%llu", MAJOR(__entry->dev),
MINOR(__entry->dev), __entry->offset)
); );
TRACE_EVENT(bcache_alloc_fail, TRACE_EVENT(bcache_alloc_fail,
...@@ -426,21 +443,22 @@ TRACE_EVENT(bcache_alloc_fail, ...@@ -426,21 +443,22 @@ TRACE_EVENT(bcache_alloc_fail,
TP_ARGS(ca, reserve), TP_ARGS(ca, reserve),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev )
__field(unsigned, free ) __field(unsigned, free )
__field(unsigned, free_inc ) __field(unsigned, free_inc )
__field(unsigned, unused )
__field(unsigned, blocked ) __field(unsigned, blocked )
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = ca->bdev->bd_dev;
__entry->free = fifo_used(&ca->free[reserve]); __entry->free = fifo_used(&ca->free[reserve]);
__entry->free_inc = fifo_used(&ca->free_inc); __entry->free_inc = fifo_used(&ca->free_inc);
__entry->unused = fifo_used(&ca->unused);
__entry->blocked = atomic_read(&ca->set->prio_blocked); __entry->blocked = atomic_read(&ca->set->prio_blocked);
), ),
TP_printk("free %u free_inc %u unused %u blocked %u", __entry->free, TP_printk("alloc fail %d,%d free %u free_inc %u blocked %u",
__entry->free_inc, __entry->unused, __entry->blocked) MAJOR(__entry->dev), MINOR(__entry->dev), __entry->free,
__entry->free_inc, __entry->blocked)
); );
/* Background writeback */ /* Background writeback */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment