Commit e84987a1 authored by Jens Axboe's avatar Jens Axboe

Merge branch 'bcache-for-3.15' of git://evilpiepirate.org/~kent/linux-bcache into for-3.15/drivers

Kent writes:

Jens, here's the bcache changes for 3.15. Lots of bugfixes, and some
refactoring and cleanups.
parents 5eb9291c cb851149
......@@ -24,11 +24,3 @@ config BCACHE_CLOSURES_DEBUG
Keeps all active closures in a linked list and provides a debugfs
interface to list them, which makes it possible to see asynchronous
operations that get stuck.
# cgroup code needs to be updated:
#
#config CGROUP_BCACHE
# bool "Cgroup controls for bcache"
# depends on BCACHE && BLK_CGROUP
# ---help---
# TODO
......@@ -78,12 +78,6 @@ uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b));
WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX);
if (CACHE_SYNC(&ca->set->sb)) {
ca->need_save_prio = max(ca->need_save_prio,
bucket_disk_gen(b));
WARN_ON_ONCE(ca->need_save_prio > BUCKET_DISK_GEN_MAX);
}
return ret;
}
......@@ -120,51 +114,45 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
mutex_unlock(&c->bucket_lock);
}
/* Allocation */
/*
* Background allocation thread: scans for buckets to be invalidated,
* invalidates them, rewrites prios/gens (marking them as invalidated on disk),
* then optionally issues discard commands to the newly free buckets, then puts
* them on the various freelists.
*/
static inline bool can_inc_bucket_gen(struct bucket *b)
{
return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX &&
bucket_disk_gen(b) < BUCKET_DISK_GEN_MAX;
return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX;
}
bool bch_bucket_add_unused(struct cache *ca, struct bucket *b)
bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b)
{
BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b));
if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) {
unsigned i;
for (i = 0; i < RESERVE_NONE; i++)
if (!fifo_full(&ca->free[i]))
goto add;
BUG_ON(!ca->set->gc_mark_valid);
return false;
}
add:
b->prio = 0;
if (can_inc_bucket_gen(b) &&
fifo_push(&ca->unused, b - ca->buckets)) {
atomic_inc(&b->pin);
return true;
}
return false;
}
static bool can_invalidate_bucket(struct cache *ca, struct bucket *b)
{
return GC_MARK(b) == GC_MARK_RECLAIMABLE &&
return (!GC_MARK(b) ||
GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
!atomic_read(&b->pin) &&
can_inc_bucket_gen(b);
}
static void invalidate_one_bucket(struct cache *ca, struct bucket *b)
void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
{
lockdep_assert_held(&ca->set->bucket_lock);
BUG_ON(GC_MARK(b) && GC_MARK(b) != GC_MARK_RECLAIMABLE);
if (GC_SECTORS_USED(b))
trace_bcache_invalidate(ca, b - ca->buckets);
bch_inc_gen(ca, b);
b->prio = INITIAL_PRIO;
atomic_inc(&b->pin);
}
static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
{
__bch_invalidate_one_bucket(ca, b);
fifo_push(&ca->free_inc, b - ca->buckets);
}
......@@ -195,20 +183,7 @@ static void invalidate_buckets_lru(struct cache *ca)
ca->heap.used = 0;
for_each_bucket(b, ca) {
/*
* If we fill up the unused list, if we then return before
* adding anything to the free_inc list we'll skip writing
* prios/gens and just go back to allocating from the unused
* list:
*/
if (fifo_full(&ca->unused))
return;
if (!can_invalidate_bucket(ca, b))
continue;
if (!GC_SECTORS_USED(b) &&
bch_bucket_add_unused(ca, b))
if (!bch_can_invalidate_bucket(ca, b))
continue;
if (!heap_full(&ca->heap))
......@@ -233,7 +208,7 @@ static void invalidate_buckets_lru(struct cache *ca)
return;
}
invalidate_one_bucket(ca, b);
bch_invalidate_one_bucket(ca, b);
}
}
......@@ -249,8 +224,8 @@ static void invalidate_buckets_fifo(struct cache *ca)
b = ca->buckets + ca->fifo_last_bucket++;
if (can_invalidate_bucket(ca, b))
invalidate_one_bucket(ca, b);
if (bch_can_invalidate_bucket(ca, b))
bch_invalidate_one_bucket(ca, b);
if (++checked >= ca->sb.nbuckets) {
ca->invalidate_needs_gc = 1;
......@@ -274,8 +249,8 @@ static void invalidate_buckets_random(struct cache *ca)
b = ca->buckets + n;
if (can_invalidate_bucket(ca, b))
invalidate_one_bucket(ca, b);
if (bch_can_invalidate_bucket(ca, b))
bch_invalidate_one_bucket(ca, b);
if (++checked >= ca->sb.nbuckets / 2) {
ca->invalidate_needs_gc = 1;
......@@ -287,8 +262,7 @@ static void invalidate_buckets_random(struct cache *ca)
static void invalidate_buckets(struct cache *ca)
{
if (ca->invalidate_needs_gc)
return;
BUG_ON(ca->invalidate_needs_gc);
switch (CACHE_REPLACEMENT(&ca->sb)) {
case CACHE_REPLACEMENT_LRU:
......@@ -301,8 +275,6 @@ static void invalidate_buckets(struct cache *ca)
invalidate_buckets_random(ca);
break;
}
trace_bcache_alloc_invalidate(ca);
}
#define allocator_wait(ca, cond) \
......@@ -350,17 +322,10 @@ static int bch_allocator_thread(void *arg)
* possibly issue discards to them, then we add the bucket to
* the free list:
*/
while (1) {
while (!fifo_empty(&ca->free_inc)) {
long bucket;
if ((!atomic_read(&ca->set->prio_blocked) ||
!CACHE_SYNC(&ca->set->sb)) &&
!fifo_empty(&ca->unused))
fifo_pop(&ca->unused, bucket);
else if (!fifo_empty(&ca->free_inc))
fifo_pop(&ca->free_inc, bucket);
else
break;
fifo_pop(&ca->free_inc, bucket);
if (ca->discard) {
mutex_unlock(&ca->set->bucket_lock);
......@@ -371,6 +336,7 @@ static int bch_allocator_thread(void *arg)
}
allocator_wait(ca, bch_allocator_push(ca, bucket));
wake_up(&ca->set->btree_cache_wait);
wake_up(&ca->set->bucket_wait);
}
......@@ -380,9 +346,9 @@ static int bch_allocator_thread(void *arg)
* them to the free_inc list:
*/
retry_invalidate:
allocator_wait(ca, ca->set->gc_mark_valid &&
(ca->need_save_prio > 64 ||
!ca->invalidate_needs_gc));
!ca->invalidate_needs_gc);
invalidate_buckets(ca);
/*
......@@ -390,13 +356,28 @@ static int bch_allocator_thread(void *arg)
* new stuff to them:
*/
allocator_wait(ca, !atomic_read(&ca->set->prio_blocked));
if (CACHE_SYNC(&ca->set->sb) &&
(!fifo_empty(&ca->free_inc) ||
ca->need_save_prio > 64))
if (CACHE_SYNC(&ca->set->sb)) {
/*
* This could deadlock if an allocation with a btree
* node locked ever blocked - having the btree node
* locked would block garbage collection, but here we're
* waiting on garbage collection before we invalidate
* and free anything.
*
* But this should be safe since the btree code always
* uses btree_check_reserve() before allocating now, and
* if it fails it blocks without btree nodes locked.
*/
if (!fifo_full(&ca->free_inc))
goto retry_invalidate;
bch_prio_write(ca);
}
}
}
/* Allocation */
long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
{
DEFINE_WAIT(w);
......@@ -408,8 +389,10 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
fifo_pop(&ca->free[reserve], r))
goto out;
if (!wait)
if (!wait) {
trace_bcache_alloc_fail(ca, reserve);
return -1;
}
do {
prepare_to_wait(&ca->set->bucket_wait, &w,
......@@ -425,6 +408,8 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
out:
wake_up_process(ca->alloc_thread);
trace_bcache_alloc(ca, reserve);
if (expensive_debug_checks(ca->set)) {
size_t iter;
long i;
......@@ -438,8 +423,6 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
BUG_ON(i == r);
fifo_for_each(i, &ca->free_inc, iter)
BUG_ON(i == r);
fifo_for_each(i, &ca->unused, iter)
BUG_ON(i == r);
}
b = ca->buckets + r;
......@@ -461,17 +444,19 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
return r;
}
void __bch_bucket_free(struct cache *ca, struct bucket *b)
{
SET_GC_MARK(b, 0);
SET_GC_SECTORS_USED(b, 0);
}
void bch_bucket_free(struct cache_set *c, struct bkey *k)
{
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++) {
struct bucket *b = PTR_BUCKET(c, k, i);
SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
SET_GC_SECTORS_USED(b, 0);
bch_bucket_add_unused(PTR_CACHE(c, k, i), b);
}
for (i = 0; i < KEY_PTRS(k); i++)
__bch_bucket_free(PTR_CACHE(c, k, i),
PTR_BUCKET(c, k, i));
}
int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
......@@ -709,25 +694,3 @@ int bch_cache_allocator_start(struct cache *ca)
ca->alloc_thread = k;
return 0;
}
int bch_cache_allocator_init(struct cache *ca)
{
/*
* Reserve:
* Prio/gen writes first
* Then 8 for btree allocations
* Then half for the moving garbage collector
*/
#if 0
ca->watermark[WATERMARK_PRIO] = 0;
ca->watermark[WATERMARK_METADATA] = prio_buckets(ca);
ca->watermark[WATERMARK_MOVINGGC] = 8 +
ca->watermark[WATERMARK_METADATA];
ca->watermark[WATERMARK_NONE] = ca->free.size / 2 +
ca->watermark[WATERMARK_MOVINGGC];
#endif
return 0;
}
......@@ -195,9 +195,7 @@ struct bucket {
atomic_t pin;
uint16_t prio;
uint8_t gen;
uint8_t disk_gen;
uint8_t last_gc; /* Most out of date gen in the btree */
uint8_t gc_gen;
uint16_t gc_mark; /* Bitfield used by GC. See below for field */
};
......@@ -207,9 +205,9 @@ struct bucket {
*/
BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
#define GC_MARK_RECLAIMABLE 0
#define GC_MARK_DIRTY 1
#define GC_MARK_METADATA 2
#define GC_MARK_RECLAIMABLE 1
#define GC_MARK_DIRTY 2
#define GC_MARK_METADATA 3
#define GC_SECTORS_USED_SIZE 13
#define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE))
BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
......@@ -426,14 +424,9 @@ struct cache {
* their new gen to disk. After prio_write() finishes writing the new
* gens/prios, they'll be moved to the free list (and possibly discarded
* in the process)
*
* unused: GC found nothing pointing into these buckets (possibly
* because all the data they contained was overwritten), so we only
* need to discard them before they can be moved to the free list.
*/
DECLARE_FIFO(long, free)[RESERVE_NR];
DECLARE_FIFO(long, free_inc);
DECLARE_FIFO(long, unused);
size_t fifo_last_bucket;
......@@ -442,12 +435,6 @@ struct cache {
DECLARE_HEAP(struct bucket *, heap);
/*
* max(gen - disk_gen) for all buckets. When it gets too big we have to
* call prio_write() to keep gens from wrapping.
*/
uint8_t need_save_prio;
/*
* If nonzero, we know we aren't going to find any buckets to invalidate
* until a gc finishes - otherwise we could pointlessly burn a ton of
......@@ -562,19 +549,16 @@ struct cache_set {
struct list_head btree_cache_freed;
/* Number of elements in btree_cache + btree_cache_freeable lists */
unsigned bucket_cache_used;
unsigned btree_cache_used;
/*
* If we need to allocate memory for a new btree node and that
* allocation fails, we can cannibalize another node in the btree cache
* to satisfy the allocation. However, only one thread can be doing this
* at a time, for obvious reasons - try_harder and try_wait are
* basically a lock for this that we can wait on asynchronously. The
* btree_root() macro releases the lock when it returns.
* to satisfy the allocation - lock to guarantee only one thread does
* this at a time:
*/
struct task_struct *try_harder;
wait_queue_head_t try_wait;
uint64_t try_harder_start;
wait_queue_head_t btree_cache_wait;
struct task_struct *btree_cache_alloc_lock;
/*
* When we free a btree node, we increment the gen of the bucket the
......@@ -603,7 +587,7 @@ struct cache_set {
uint16_t min_prio;
/*
* max(gen - gc_gen) for all buckets. When it gets too big we have to gc
* max(gen - last_gc) for all buckets. When it gets too big we have to gc
* to keep gens from wrapping around.
*/
uint8_t need_gc;
......@@ -628,6 +612,8 @@ struct cache_set {
/* Number of moving GC bios in flight */
struct semaphore moving_in_flight;
struct workqueue_struct *moving_gc_wq;
struct btree *root;
#ifdef CONFIG_BCACHE_DEBUG
......@@ -667,7 +653,6 @@ struct cache_set {
struct time_stats btree_gc_time;
struct time_stats btree_split_time;
struct time_stats btree_read_time;
struct time_stats try_harder_time;
atomic_long_t cache_read_races;
atomic_long_t writeback_keys_done;
......@@ -850,9 +835,6 @@ static inline bool cached_dev_get(struct cached_dev *dc)
/*
* bucket_gc_gen() returns the difference between the bucket's current gen and
* the oldest gen of any pointer into that bucket in the btree (last_gc).
*
* bucket_disk_gen() returns the difference between the current gen and the gen
* on disk; they're both used to make sure gens don't wrap around.
*/
static inline uint8_t bucket_gc_gen(struct bucket *b)
......@@ -860,13 +842,7 @@ static inline uint8_t bucket_gc_gen(struct bucket *b)
return b->gen - b->last_gc;
}
static inline uint8_t bucket_disk_gen(struct bucket *b)
{
return b->gen - b->disk_gen;
}
#define BUCKET_GC_GEN_MAX 96U
#define BUCKET_DISK_GEN_MAX 64U
#define kobj_attribute_write(n, fn) \
static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
......@@ -899,11 +875,14 @@ void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
uint8_t bch_inc_gen(struct cache *, struct bucket *);
void bch_rescale_priorities(struct cache_set *, int);
bool bch_bucket_add_unused(struct cache *, struct bucket *);
long bch_bucket_alloc(struct cache *, unsigned, bool);
bool bch_can_invalidate_bucket(struct cache *, struct bucket *);
void __bch_invalidate_one_bucket(struct cache *, struct bucket *);
void __bch_bucket_free(struct cache *, struct bucket *);
void bch_bucket_free(struct cache_set *, struct bkey *);
long bch_bucket_alloc(struct cache *, unsigned, bool);
int __bch_bucket_alloc_set(struct cache_set *, unsigned,
struct bkey *, int, bool);
int bch_bucket_alloc_set(struct cache_set *, unsigned,
......@@ -954,13 +933,10 @@ int bch_open_buckets_alloc(struct cache_set *);
void bch_open_buckets_free(struct cache_set *);
int bch_cache_allocator_start(struct cache *ca);
int bch_cache_allocator_init(struct cache *ca);
void bch_debug_exit(void);
int bch_debug_init(struct kobject *);
void bch_request_exit(void);
int bch_request_init(void);
void bch_btree_exit(void);
int bch_btree_init(void);
#endif /* _BCACHE_H */
......@@ -23,8 +23,8 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
for (k = i->start; k < bset_bkey_last(i); k = next) {
next = bkey_next(k);
printk(KERN_ERR "block %u key %li/%u: ", set,
(uint64_t *) k - i->d, i->keys);
printk(KERN_ERR "block %u key %u/%u: ", set,
(unsigned) ((u64 *) k - i->d), i->keys);
if (b->ops->key_dump)
b->ops->key_dump(b, k);
......
......@@ -478,6 +478,12 @@ static inline void bch_keylist_init(struct keylist *l)
l->top_p = l->keys_p = l->inline_keys;
}
static inline void bch_keylist_init_single(struct keylist *l, struct bkey *k)
{
l->keys = k;
l->top = bkey_next(k);
}
static inline void bch_keylist_push(struct keylist *l)
{
l->top = bkey_next(l->top);
......
......@@ -68,15 +68,11 @@
* alloc_bucket() cannot fail. This should be true but is not completely
* obvious.
*
* Make sure all allocations get charged to the root cgroup
*
* Plugging?
*
* If data write is less than hard sector size of ssd, round up offset in open
* bucket to the next whole sector
*
* Also lookup by cgroup in get_open_bucket()
*
* Superblock needs to be fleshed out for multiple cache devices
*
* Add a sysfs tunable for the number of writeback IOs in flight
......@@ -97,8 +93,6 @@
#define PTR_HASH(c, k) \
(((k)->ptr[0] >> c->bucket_bits) | PTR_GEN(k, 0))
static struct workqueue_struct *btree_io_wq;
#define insert_lock(s, b) ((b)->level <= (s)->lock)
/*
......@@ -123,7 +117,7 @@ static struct workqueue_struct *btree_io_wq;
({ \
int _r, l = (b)->level - 1; \
bool _w = l <= (op)->lock; \
struct btree *_child = bch_btree_node_get((b)->c, key, l, _w); \
struct btree *_child = bch_btree_node_get((b)->c, op, key, l, _w);\
if (!IS_ERR(_child)) { \
_child->parent = (b); \
_r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
......@@ -152,17 +146,12 @@ static struct workqueue_struct *btree_io_wq;
_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
} \
rw_unlock(_w, _b); \
bch_cannibalize_unlock(c); \
if (_r == -EINTR) \
schedule(); \
bch_cannibalize_unlock(c); \
if (_r == -ENOSPC) { \
wait_event((c)->try_wait, \
!(c)->try_harder); \
_r = -EINTR; \
} \
} while (_r == -EINTR); \
\
finish_wait(&(c)->bucket_wait, &(op)->wait); \
finish_wait(&(c)->btree_cache_wait, &(op)->wait); \
_r; \
})
......@@ -171,6 +160,20 @@ static inline struct bset *write_block(struct btree *b)
return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c);
}
static void bch_btree_init_next(struct btree *b)
{
/* If not a leaf node, always sort */
if (b->level && b->keys.nsets)
bch_btree_sort(&b->keys, &b->c->sort);
else
bch_btree_sort_lazy(&b->keys, &b->c->sort);
if (b->written < btree_blocks(b))
bch_bset_init_next(&b->keys, write_block(b),
bset_magic(&b->c->sb));
}
/* Btree key manipulation */
void bkey_put(struct cache_set *c, struct bkey *k)
......@@ -352,8 +355,7 @@ static void __btree_node_write_done(struct closure *cl)
btree_complete_write(b, w);
if (btree_node_dirty(b))
queue_delayed_work(btree_io_wq, &b->work,
msecs_to_jiffies(30000));
schedule_delayed_work(&b->work, 30 * HZ);
closure_return_with_destructor(cl, btree_node_write_unlock);
}
......@@ -442,10 +444,12 @@ static void do_btree_node_write(struct btree *b)
}
}
void bch_btree_node_write(struct btree *b, struct closure *parent)
void __bch_btree_node_write(struct btree *b, struct closure *parent)
{
struct bset *i = btree_bset_last(b);
lockdep_assert_held(&b->write_lock);
trace_bcache_btree_write(b);
BUG_ON(current->bio_list);
......@@ -469,23 +473,24 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
&PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
b->written += set_blocks(i, block_bytes(b->c));
}
/* If not a leaf node, always sort */
if (b->level && b->keys.nsets)
bch_btree_sort(&b->keys, &b->c->sort);
else
bch_btree_sort_lazy(&b->keys, &b->c->sort);
void bch_btree_node_write(struct btree *b, struct closure *parent)
{
unsigned nsets = b->keys.nsets;
lockdep_assert_held(&b->lock);
__bch_btree_node_write(b, parent);
/*
* do verify if there was more than one set initially (i.e. we did a
* sort) and we sorted down to a single set:
*/
if (i != b->keys.set->data && !b->keys.nsets)
if (nsets && !b->keys.nsets)
bch_btree_verify(b);
if (b->written < btree_blocks(b))
bch_bset_init_next(&b->keys, write_block(b),
bset_magic(&b->c->sb));
bch_btree_init_next(b);
}
static void bch_btree_node_write_sync(struct btree *b)
......@@ -493,7 +498,11 @@ static void bch_btree_node_write_sync(struct btree *b)
struct closure cl;
closure_init_stack(&cl);
mutex_lock(&b->write_lock);
bch_btree_node_write(b, &cl);
mutex_unlock(&b->write_lock);
closure_sync(&cl);
}
......@@ -501,11 +510,10 @@ static void btree_node_write_work(struct work_struct *w)
{
struct btree *b = container_of(to_delayed_work(w), struct btree, work);
rw_lock(true, b, b->level);
mutex_lock(&b->write_lock);
if (btree_node_dirty(b))
bch_btree_node_write(b, NULL);
rw_unlock(true, b);
__bch_btree_node_write(b, NULL);
mutex_unlock(&b->write_lock);
}
static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
......@@ -513,11 +521,13 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
struct bset *i = btree_bset_last(b);
struct btree_write *w = btree_current_write(b);
lockdep_assert_held(&b->write_lock);
BUG_ON(!b->written);
BUG_ON(!i->keys);
if (!btree_node_dirty(b))
queue_delayed_work(btree_io_wq, &b->work, 30 * HZ);
schedule_delayed_work(&b->work, 30 * HZ);
set_btree_node_dirty(b);
......@@ -548,7 +558,7 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
#define mca_reserve(c) (((c->root && c->root->level) \
? c->root->level : 1) * 8 + 16)
#define mca_can_free(c) \
max_t(int, 0, c->bucket_cache_used - mca_reserve(c))
max_t(int, 0, c->btree_cache_used - mca_reserve(c))
static void mca_data_free(struct btree *b)
{
......@@ -556,7 +566,7 @@ static void mca_data_free(struct btree *b)
bch_btree_keys_free(&b->keys);
b->c->bucket_cache_used--;
b->c->btree_cache_used--;
list_move(&b->list, &b->c->btree_cache_freed);
}
......@@ -581,7 +591,7 @@ static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
ilog2(b->c->btree_pages),
btree_order(k)),
gfp)) {
b->c->bucket_cache_used++;
b->c->btree_cache_used++;
list_move(&b->list, &b->c->btree_cache);
} else {
list_move(&b->list, &b->c->btree_cache_freed);
......@@ -597,6 +607,8 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
init_rwsem(&b->lock);
lockdep_set_novalidate_class(&b->lock);
mutex_init(&b->write_lock);
lockdep_set_novalidate_class(&b->write_lock);
INIT_LIST_HEAD(&b->list);
INIT_DELAYED_WORK(&b->work, btree_node_write_work);
b->c = c;
......@@ -630,8 +642,12 @@ static int mca_reap(struct btree *b, unsigned min_order, bool flush)
up(&b->io_mutex);
}
mutex_lock(&b->write_lock);
if (btree_node_dirty(b))
bch_btree_node_write_sync(b);
__bch_btree_node_write(b, &cl);
mutex_unlock(&b->write_lock);
closure_sync(&cl);
/* wait for any in flight btree write */
down(&b->io_mutex);
......@@ -654,7 +670,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
if (c->shrinker_disabled)
return SHRINK_STOP;
if (c->try_harder)
if (c->btree_cache_alloc_lock)
return SHRINK_STOP;
/* Return -1 if we can't do anything right now */
......@@ -686,7 +702,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
}
}
for (i = 0; (nr--) && i < c->bucket_cache_used; i++) {
for (i = 0; (nr--) && i < c->btree_cache_used; i++) {
if (list_empty(&c->btree_cache))
goto out;
......@@ -715,7 +731,7 @@ static unsigned long bch_mca_count(struct shrinker *shrink,
if (c->shrinker_disabled)
return 0;
if (c->try_harder)
if (c->btree_cache_alloc_lock)
return 0;
return mca_can_free(c) * c->btree_pages;
......@@ -819,17 +835,30 @@ static struct btree *mca_find(struct cache_set *c, struct bkey *k)
return b;
}
static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k)
static int mca_cannibalize_lock(struct cache_set *c, struct btree_op *op)
{
struct task_struct *old;
old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current);
if (old && old != current) {
if (op)
prepare_to_wait(&c->btree_cache_wait, &op->wait,
TASK_UNINTERRUPTIBLE);
return -EINTR;
}
return 0;
}
static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op,
struct bkey *k)
{
struct btree *b;
trace_bcache_btree_cache_cannibalize(c);
if (!c->try_harder) {
c->try_harder = current;
c->try_harder_start = local_clock();
} else if (c->try_harder != current)
return ERR_PTR(-ENOSPC);
if (mca_cannibalize_lock(c, op))
return ERR_PTR(-EINTR);
list_for_each_entry_reverse(b, &c->btree_cache, list)
if (!mca_reap(b, btree_order(k), false))
......@@ -839,6 +868,7 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k)
if (!mca_reap(b, btree_order(k), true))
return b;
WARN(1, "btree cache cannibalize failed\n");
return ERR_PTR(-ENOMEM);
}
......@@ -850,14 +880,14 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k)
*/
static void bch_cannibalize_unlock(struct cache_set *c)
{
if (c->try_harder == current) {
bch_time_stats_update(&c->try_harder_time, c->try_harder_start);
c->try_harder = NULL;
wake_up(&c->try_wait);
if (c->btree_cache_alloc_lock == current) {
c->btree_cache_alloc_lock = NULL;
wake_up(&c->btree_cache_wait);
}
}
static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, int level)
static struct btree *mca_alloc(struct cache_set *c, struct btree_op *op,
struct bkey *k, int level)
{
struct btree *b;
......@@ -920,7 +950,7 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, int level)
if (b)
rw_unlock(true, b);
b = mca_cannibalize(c, k);
b = mca_cannibalize(c, op, k);
if (!IS_ERR(b))
goto out;
......@@ -936,8 +966,8 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, int level)
* The btree node will have either a read or a write lock held, depending on
* level and op->lock.
*/
struct btree *bch_btree_node_get(struct cache_set *c, struct bkey *k,
int level, bool write)
struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
struct bkey *k, int level, bool write)
{
int i = 0;
struct btree *b;
......@@ -951,7 +981,7 @@ struct btree *bch_btree_node_get(struct cache_set *c, struct bkey *k,
return ERR_PTR(-EAGAIN);
mutex_lock(&c->bucket_lock);
b = mca_alloc(c, k, level);
b = mca_alloc(c, op, k, level);
mutex_unlock(&c->bucket_lock);
if (!b)
......@@ -997,7 +1027,7 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level)
struct btree *b;
mutex_lock(&c->bucket_lock);
b = mca_alloc(c, k, level);
b = mca_alloc(c, NULL, k, level);
mutex_unlock(&c->bucket_lock);
if (!IS_ERR_OR_NULL(b)) {
......@@ -1010,46 +1040,41 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level)
static void btree_node_free(struct btree *b)
{
unsigned i;
trace_bcache_btree_node_free(b);
BUG_ON(b == b->c->root);
mutex_lock(&b->write_lock);
if (btree_node_dirty(b))
btree_complete_write(b, btree_current_write(b));
clear_bit(BTREE_NODE_dirty, &b->flags);
mutex_unlock(&b->write_lock);
cancel_delayed_work(&b->work);
mutex_lock(&b->c->bucket_lock);
for (i = 0; i < KEY_PTRS(&b->key); i++) {
BUG_ON(atomic_read(&PTR_BUCKET(b->c, &b->key, i)->pin));
bch_inc_gen(PTR_CACHE(b->c, &b->key, i),
PTR_BUCKET(b->c, &b->key, i));
}
bch_bucket_free(b->c, &b->key);
mca_bucket_free(b);
mutex_unlock(&b->c->bucket_lock);
}
struct btree *bch_btree_node_alloc(struct cache_set *c, int level, bool wait)
struct btree *bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
int level)
{
BKEY_PADDED(key) k;
struct btree *b = ERR_PTR(-EAGAIN);
mutex_lock(&c->bucket_lock);
retry:
if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait))
if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, op != NULL))
goto err;
bkey_put(c, &k.key);
SET_KEY_SIZE(&k.key, c->btree_pages * PAGE_SECTORS);
b = mca_alloc(c, &k.key, level);
b = mca_alloc(c, op, &k.key, level);
if (IS_ERR(b))
goto err_free;
......@@ -1075,12 +1100,15 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level, bool wait)
return b;
}
static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait)
static struct btree *btree_node_alloc_replacement(struct btree *b,
struct btree_op *op)
{
struct btree *n = bch_btree_node_alloc(b->c, b->level, wait);
struct btree *n = bch_btree_node_alloc(b->c, op, b->level);
if (!IS_ERR_OR_NULL(n)) {
mutex_lock(&n->write_lock);
bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
bkey_copy_key(&n->key, &b->key);
mutex_unlock(&n->write_lock);
}
return n;
......@@ -1090,43 +1118,47 @@ static void make_btree_freeing_key(struct btree *b, struct bkey *k)
{
unsigned i;
mutex_lock(&b->c->bucket_lock);
atomic_inc(&b->c->prio_blocked);
bkey_copy(k, &b->key);
bkey_copy_key(k, &ZERO_KEY);
for (i = 0; i < KEY_PTRS(k); i++) {
uint8_t g = PTR_BUCKET(b->c, k, i)->gen + 1;
SET_PTR_GEN(k, i, g);
}
for (i = 0; i < KEY_PTRS(k); i++)
SET_PTR_GEN(k, i,
bch_inc_gen(PTR_CACHE(b->c, &b->key, i),
PTR_BUCKET(b->c, &b->key, i)));
atomic_inc(&b->c->prio_blocked);
mutex_unlock(&b->c->bucket_lock);
}
static int btree_check_reserve(struct btree *b, struct btree_op *op)
{
struct cache_set *c = b->c;
struct cache *ca;
unsigned i, reserve = c->root->level * 2 + 1;
int ret = 0;
unsigned i, reserve = (c->root->level - b->level) * 2 + 1;
mutex_lock(&c->bucket_lock);
for_each_cache(ca, c, i)
if (fifo_used(&ca->free[RESERVE_BTREE]) < reserve) {
if (op)
prepare_to_wait(&c->bucket_wait, &op->wait,
prepare_to_wait(&c->btree_cache_wait, &op->wait,
TASK_UNINTERRUPTIBLE);
ret = -EINTR;
break;
mutex_unlock(&c->bucket_lock);
return -EINTR;
}
mutex_unlock(&c->bucket_lock);
return ret;
return mca_cannibalize_lock(b->c, op);
}
/* Garbage collection */
uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k)
static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
struct bkey *k)
{
uint8_t stale = 0;
unsigned i;
......@@ -1146,8 +1178,8 @@ uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k)
g = PTR_BUCKET(c, k, i);
if (gen_after(g->gc_gen, PTR_GEN(k, i)))
g->gc_gen = PTR_GEN(k, i);
if (gen_after(g->last_gc, PTR_GEN(k, i)))
g->last_gc = PTR_GEN(k, i);
if (ptr_stale(c, k, i)) {
stale = max(stale, ptr_stale(c, k, i));
......@@ -1163,6 +1195,8 @@ uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k)
SET_GC_MARK(g, GC_MARK_METADATA);
else if (KEY_DIRTY(k))
SET_GC_MARK(g, GC_MARK_DIRTY);
else if (!GC_MARK(g))
SET_GC_MARK(g, GC_MARK_RECLAIMABLE);
/* guard against overflow */
SET_GC_SECTORS_USED(g, min_t(unsigned,
......@@ -1177,6 +1211,26 @@ uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k)
#define btree_mark_key(b, k) __bch_btree_mark_key(b->c, b->level, k)
void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k)
{
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i) &&
!ptr_stale(c, k, i)) {
struct bucket *b = PTR_BUCKET(c, k, i);
b->gen = PTR_GEN(k, i);
if (level && bkey_cmp(k, &ZERO_KEY))
b->prio = BTREE_PRIO;
else if (!level && b->prio == BTREE_PRIO)
b->prio = INITIAL_PRIO;
}
__bch_btree_mark_key(c, level, k);
}
static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
{
uint8_t stale = 0;
......@@ -1230,14 +1284,19 @@ static int bch_btree_insert_node(struct btree *, struct btree_op *,
struct keylist *, atomic_t *, struct bkey *);
static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
struct keylist *keylist, struct gc_stat *gc,
struct gc_merge_info *r)
struct gc_stat *gc, struct gc_merge_info *r)
{
unsigned i, nodes = 0, keys = 0, blocks;
struct btree *new_nodes[GC_MERGE_NODES];
struct keylist keylist;
struct closure cl;
struct bkey *k;
bch_keylist_init(&keylist);
if (btree_check_reserve(b, NULL))
return 0;
memset(new_nodes, 0, sizeof(new_nodes));
closure_init_stack(&cl);
......@@ -1252,11 +1311,23 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
return 0;
for (i = 0; i < nodes; i++) {
new_nodes[i] = btree_node_alloc_replacement(r[i].b, false);
new_nodes[i] = btree_node_alloc_replacement(r[i].b, NULL);
if (IS_ERR_OR_NULL(new_nodes[i]))
goto out_nocoalesce;
}
/*
* We have to check the reserve here, after we've allocated our new
* nodes, to make sure the insert below will succeed - we also check
* before as an optimization to potentially avoid a bunch of expensive
* allocs/sorts
*/
if (btree_check_reserve(b, NULL))
goto out_nocoalesce;
for (i = 0; i < nodes; i++)
mutex_lock(&new_nodes[i]->write_lock);
for (i = nodes - 1; i > 0; --i) {
struct bset *n1 = btree_bset_first(new_nodes[i]);
struct bset *n2 = btree_bset_first(new_nodes[i - 1]);
......@@ -1315,28 +1386,34 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
n2->keys -= keys;
if (__bch_keylist_realloc(keylist,
if (__bch_keylist_realloc(&keylist,
bkey_u64s(&new_nodes[i]->key)))
goto out_nocoalesce;
bch_btree_node_write(new_nodes[i], &cl);
bch_keylist_add(keylist, &new_nodes[i]->key);
bch_keylist_add(&keylist, &new_nodes[i]->key);
}
for (i = 0; i < nodes; i++) {
if (__bch_keylist_realloc(keylist, bkey_u64s(&r[i].b->key)))
goto out_nocoalesce;
for (i = 0; i < nodes; i++)
mutex_unlock(&new_nodes[i]->write_lock);
make_btree_freeing_key(r[i].b, keylist->top);
bch_keylist_push(keylist);
}
closure_sync(&cl);
/* We emptied out this node */
BUG_ON(btree_bset_first(new_nodes[0])->keys);
btree_node_free(new_nodes[0]);
rw_unlock(true, new_nodes[0]);
closure_sync(&cl);
for (i = 0; i < nodes; i++) {
if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key)))
goto out_nocoalesce;
make_btree_freeing_key(r[i].b, keylist.top);
bch_keylist_push(&keylist);
}
bch_btree_insert_node(b, op, &keylist, NULL, NULL);
BUG_ON(!bch_keylist_empty(&keylist));
for (i = 0; i < nodes; i++) {
btree_node_free(r[i].b);
......@@ -1345,22 +1422,22 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
r[i].b = new_nodes[i];
}
bch_btree_insert_node(b, op, keylist, NULL, NULL);
BUG_ON(!bch_keylist_empty(keylist));
memmove(r, r + 1, sizeof(r[0]) * (nodes - 1));
r[nodes - 1].b = ERR_PTR(-EINTR);
trace_bcache_btree_gc_coalesce(nodes);
gc->nodes--;
bch_keylist_free(&keylist);
/* Invalidated our iterator */
return -EINTR;
out_nocoalesce:
closure_sync(&cl);
bch_keylist_free(&keylist);
while ((k = bch_keylist_pop(keylist)))
while ((k = bch_keylist_pop(&keylist)))
if (!bkey_cmp(k, &ZERO_KEY))
atomic_dec(&b->c->prio_blocked);
......@@ -1372,6 +1449,42 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
return 0;
}
static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
struct btree *replace)
{
struct keylist keys;
struct btree *n;
if (btree_check_reserve(b, NULL))
return 0;
n = btree_node_alloc_replacement(replace, NULL);
/* recheck reserve after allocating replacement node */
if (btree_check_reserve(b, NULL)) {
btree_node_free(n);
rw_unlock(true, n);
return 0;
}
bch_btree_node_write_sync(n);
bch_keylist_init(&keys);
bch_keylist_add(&keys, &n->key);
make_btree_freeing_key(replace, keys.top);
bch_keylist_push(&keys);
bch_btree_insert_node(b, op, &keys, NULL, NULL);
BUG_ON(!bch_keylist_empty(&keys));
btree_node_free(replace);
rw_unlock(true, n);
/* Invalidated our iterator */
return -EINTR;
}
static unsigned btree_gc_count_keys(struct btree *b)
{
struct bkey *k;
......@@ -1387,26 +1500,23 @@ static unsigned btree_gc_count_keys(struct btree *b)
static int btree_gc_recurse(struct btree *b, struct btree_op *op,
struct closure *writes, struct gc_stat *gc)
{
unsigned i;
int ret = 0;
bool should_rewrite;
struct btree *n;
struct bkey *k;
struct keylist keys;
struct btree_iter iter;
struct gc_merge_info r[GC_MERGE_NODES];
struct gc_merge_info *last = r + GC_MERGE_NODES - 1;
struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1;
bch_keylist_init(&keys);
bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done);
for (i = 0; i < GC_MERGE_NODES; i++)
r[i].b = ERR_PTR(-EINTR);
for (i = r; i < r + ARRAY_SIZE(r); i++)
i->b = ERR_PTR(-EINTR);
while (1) {
k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
if (k) {
r->b = bch_btree_node_get(b->c, k, b->level - 1, true);
r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
true);
if (IS_ERR(r->b)) {
ret = PTR_ERR(r->b);
break;
......@@ -1414,7 +1524,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
r->keys = btree_gc_count_keys(r->b);
ret = btree_gc_coalesce(b, op, &keys, gc, r);
ret = btree_gc_coalesce(b, op, gc, r);
if (ret)
break;
}
......@@ -1424,32 +1534,10 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
if (!IS_ERR(last->b)) {
should_rewrite = btree_gc_mark_node(last->b, gc);
if (should_rewrite &&
!btree_check_reserve(b, NULL)) {
n = btree_node_alloc_replacement(last->b,
false);
if (!IS_ERR_OR_NULL(n)) {
bch_btree_node_write_sync(n);
bch_keylist_add(&keys, &n->key);
make_btree_freeing_key(last->b,
keys.top);
bch_keylist_push(&keys);
btree_node_free(last->b);
bch_btree_insert_node(b, op, &keys,
NULL, NULL);
BUG_ON(!bch_keylist_empty(&keys));
rw_unlock(true, last->b);
last->b = n;
/* Invalidated our iterator */
ret = -EINTR;
if (should_rewrite) {
ret = btree_gc_rewrite_node(b, op, last->b);
if (ret)
break;
}
}
if (last->b->level) {
......@@ -1464,8 +1552,10 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
* Must flush leaf nodes before gc ends, since replace
* operations aren't journalled
*/
mutex_lock(&last->b->write_lock);
if (btree_node_dirty(last->b))
bch_btree_node_write(last->b, writes);
mutex_unlock(&last->b->write_lock);
rw_unlock(true, last->b);
}
......@@ -1478,15 +1568,15 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
}
}
for (i = 0; i < GC_MERGE_NODES; i++)
if (!IS_ERR_OR_NULL(r[i].b)) {
if (btree_node_dirty(r[i].b))
bch_btree_node_write(r[i].b, writes);
rw_unlock(true, r[i].b);
for (i = r; i < r + ARRAY_SIZE(r); i++)
if (!IS_ERR_OR_NULL(i->b)) {
mutex_lock(&i->b->write_lock);
if (btree_node_dirty(i->b))
bch_btree_node_write(i->b, writes);
mutex_unlock(&i->b->write_lock);
rw_unlock(true, i->b);
}
bch_keylist_free(&keys);
return ret;
}
......@@ -1499,10 +1589,11 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op,
should_rewrite = btree_gc_mark_node(b, gc);
if (should_rewrite) {
n = btree_node_alloc_replacement(b, false);
n = btree_node_alloc_replacement(b, NULL);
if (!IS_ERR_OR_NULL(n)) {
bch_btree_node_write_sync(n);
bch_btree_set_root(n);
btree_node_free(b);
rw_unlock(true, n);
......@@ -1511,6 +1602,8 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op,
}
}
__bch_btree_mark_key(b->c, b->level + 1, &b->key);
if (b->level) {
ret = btree_gc_recurse(b, op, writes, gc);
if (ret)
......@@ -1538,9 +1631,9 @@ static void btree_gc_start(struct cache_set *c)
for_each_cache(ca, c, i)
for_each_bucket(b, ca) {
b->gc_gen = b->gen;
b->last_gc = b->gen;
if (!atomic_read(&b->pin)) {
SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
SET_GC_MARK(b, 0);
SET_GC_SECTORS_USED(b, 0);
}
}
......@@ -1548,7 +1641,7 @@ static void btree_gc_start(struct cache_set *c)
mutex_unlock(&c->bucket_lock);
}
size_t bch_btree_gc_finish(struct cache_set *c)
static size_t bch_btree_gc_finish(struct cache_set *c)
{
size_t available = 0;
struct bucket *b;
......@@ -1561,11 +1654,6 @@ size_t bch_btree_gc_finish(struct cache_set *c)
c->gc_mark_valid = 1;
c->need_gc = 0;
if (c->root)
for (i = 0; i < KEY_PTRS(&c->root->key); i++)
SET_GC_MARK(PTR_BUCKET(c, &c->root->key, i),
GC_MARK_METADATA);
for (i = 0; i < KEY_PTRS(&c->uuid_bucket); i++)
SET_GC_MARK(PTR_BUCKET(c, &c->uuid_bucket, i),
GC_MARK_METADATA);
......@@ -1605,15 +1693,15 @@ size_t bch_btree_gc_finish(struct cache_set *c)
SET_GC_MARK(ca->buckets + *i, GC_MARK_METADATA);
for_each_bucket(b, ca) {
b->last_gc = b->gc_gen;
c->need_gc = max(c->need_gc, bucket_gc_gen(b));
if (!atomic_read(&b->pin) &&
GC_MARK(b) == GC_MARK_RECLAIMABLE) {
if (atomic_read(&b->pin))
continue;
BUG_ON(!GC_MARK(b) && GC_SECTORS_USED(b));
if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE)
available++;
if (!GC_SECTORS_USED(b))
bch_bucket_add_unused(ca, b);
}
}
}
......@@ -1705,36 +1793,16 @@ int bch_gc_thread_start(struct cache_set *c)
/* Initial partial gc */
static int bch_btree_check_recurse(struct btree *b, struct btree_op *op,
unsigned long **seen)
static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
{
int ret = 0;
unsigned i;
struct bkey *k, *p = NULL;
struct bucket *g;
struct btree_iter iter;
for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) {
for (i = 0; i < KEY_PTRS(k); i++) {
if (!ptr_available(b->c, k, i))
continue;
g = PTR_BUCKET(b->c, k, i);
for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid)
bch_initial_mark_key(b->c, b->level, k);
if (!__test_and_set_bit(PTR_BUCKET_NR(b->c, k, i),
seen[PTR_DEV(k, i)]) ||
!ptr_stale(b->c, k, i)) {
g->gen = PTR_GEN(k, i);
if (b->level)
g->prio = BTREE_PRIO;
else if (g->prio == BTREE_PRIO)
g->prio = INITIAL_PRIO;
}
}
btree_mark_key(b, k);
}
bch_initial_mark_key(b->c, b->level + 1, &b->key);
if (b->level) {
bch_btree_iter_init(&b->keys, &iter, NULL);
......@@ -1746,40 +1814,58 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op,
btree_node_prefetch(b->c, k, b->level - 1);
if (p)
ret = btree(check_recurse, p, b, op, seen);
ret = btree(check_recurse, p, b, op);
p = k;
} while (p && !ret);
}
return 0;
return ret;
}
int bch_btree_check(struct cache_set *c)
{
int ret = -ENOMEM;
unsigned i;
unsigned long *seen[MAX_CACHES_PER_SET];
struct btree_op op;
memset(seen, 0, sizeof(seen));
bch_btree_op_init(&op, SHRT_MAX);
for (i = 0; c->cache[i]; i++) {
size_t n = DIV_ROUND_UP(c->cache[i]->sb.nbuckets, 8);
seen[i] = kmalloc(n, GFP_KERNEL);
if (!seen[i])
goto err;
return btree_root(check_recurse, c, &op);
}
void bch_initial_gc_finish(struct cache_set *c)
{
struct cache *ca;
struct bucket *b;
unsigned i;
bch_btree_gc_finish(c);
/* Disables the seen array until prio_read() uses it too */
memset(seen[i], 0xFF, n);
mutex_lock(&c->bucket_lock);
/*
* We need to put some unused buckets directly on the prio freelist in
* order to get the allocator thread started - it needs freed buckets in
* order to rewrite the prios and gens, and it needs to rewrite prios
* and gens in order to free buckets.
*
* This is only safe for buckets that have no live data in them, which
* there should always be some of.
*/
for_each_cache(ca, c, i) {
for_each_bucket(b, ca) {
if (fifo_full(&ca->free[RESERVE_PRIO]))
break;
if (bch_can_invalidate_bucket(ca, b) &&
!GC_MARK(b)) {
__bch_invalidate_one_bucket(ca, b);
fifo_push(&ca->free[RESERVE_PRIO],
b - ca->buckets);
}
}
}
ret = btree_root(check_recurse, c, &op, seen);
err:
for (i = 0; i < MAX_CACHES_PER_SET; i++)
kfree(seen[i]);
return ret;
mutex_unlock(&c->bucket_lock);
}
/* Btree insertion */
......@@ -1871,11 +1957,14 @@ static int btree_split(struct btree *b, struct btree_op *op,
closure_init_stack(&cl);
bch_keylist_init(&parent_keys);
if (!b->level &&
btree_check_reserve(b, op))
return -EINTR;
if (btree_check_reserve(b, op)) {
if (!b->level)
return -EINTR;
else
WARN(1, "insufficient reserve for split\n");
}
n1 = btree_node_alloc_replacement(b, true);
n1 = btree_node_alloc_replacement(b, op);
if (IS_ERR(n1))
goto err;
......@@ -1887,16 +1976,19 @@ static int btree_split(struct btree *b, struct btree_op *op,
trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
n2 = bch_btree_node_alloc(b->c, b->level, true);
n2 = bch_btree_node_alloc(b->c, op, b->level);
if (IS_ERR(n2))
goto err_free1;
if (!b->parent) {
n3 = bch_btree_node_alloc(b->c, b->level + 1, true);
n3 = bch_btree_node_alloc(b->c, op, b->level + 1);
if (IS_ERR(n3))
goto err_free2;
}
mutex_lock(&n1->write_lock);
mutex_lock(&n2->write_lock);
bch_btree_insert_keys(n1, op, insert_keys, replace_key);
/*
......@@ -1923,45 +2015,45 @@ static int btree_split(struct btree *b, struct btree_op *op,
bch_keylist_add(&parent_keys, &n2->key);
bch_btree_node_write(n2, &cl);
mutex_unlock(&n2->write_lock);
rw_unlock(true, n2);
} else {
trace_bcache_btree_node_compact(b, btree_bset_first(n1)->keys);
mutex_lock(&n1->write_lock);
bch_btree_insert_keys(n1, op, insert_keys, replace_key);
}
bch_keylist_add(&parent_keys, &n1->key);
bch_btree_node_write(n1, &cl);
mutex_unlock(&n1->write_lock);
if (n3) {
/* Depth increases, make a new root */
mutex_lock(&n3->write_lock);
bkey_copy_key(&n3->key, &MAX_KEY);
bch_btree_insert_keys(n3, op, &parent_keys, NULL);
bch_btree_node_write(n3, &cl);
mutex_unlock(&n3->write_lock);
closure_sync(&cl);
bch_btree_set_root(n3);
rw_unlock(true, n3);
btree_node_free(b);
} else if (!b->parent) {
/* Root filled up but didn't need to be split */
closure_sync(&cl);
bch_btree_set_root(n1);
btree_node_free(b);
} else {
/* Split a non root node */
closure_sync(&cl);
make_btree_freeing_key(b, parent_keys.top);
bch_keylist_push(&parent_keys);
btree_node_free(b);
bch_btree_insert_node(b->parent, op, &parent_keys, NULL, NULL);
BUG_ON(!bch_keylist_empty(&parent_keys));
}
btree_node_free(b);
rw_unlock(true, n1);
bch_time_stats_update(&b->c->btree_split_time, start_time);
......@@ -1976,7 +2068,7 @@ static int btree_split(struct btree *b, struct btree_op *op,
btree_node_free(n1);
rw_unlock(true, n1);
err:
WARN(1, "bcache: btree split failed");
WARN(1, "bcache: btree split failed (level %u)", b->level);
if (n3 == ERR_PTR(-EAGAIN) ||
n2 == ERR_PTR(-EAGAIN) ||
......@@ -1991,33 +2083,54 @@ static int bch_btree_insert_node(struct btree *b, struct btree_op *op,
atomic_t *journal_ref,
struct bkey *replace_key)
{
struct closure cl;
BUG_ON(b->level && replace_key);
closure_init_stack(&cl);
mutex_lock(&b->write_lock);
if (write_block(b) != btree_bset_last(b) &&
b->keys.last_set_unwritten)
bch_btree_init_next(b); /* just wrote a set */
if (bch_keylist_nkeys(insert_keys) > insert_u64s_remaining(b)) {
if (current->bio_list) {
op->lock = b->c->root->level + 1;
return -EAGAIN;
} else if (op->lock <= b->c->root->level) {
op->lock = b->c->root->level + 1;
return -EINTR;
} else {
/* Invalidated all iterators */
int ret = btree_split(b, op, insert_keys, replace_key);
mutex_unlock(&b->write_lock);
goto split;
}
return bch_keylist_empty(insert_keys) ?
0 : ret ?: -EINTR;
}
} else {
BUG_ON(write_block(b) != btree_bset_last(b));
BUG_ON(write_block(b) != btree_bset_last(b));
if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) {
if (!b->level)
bch_btree_leaf_dirty(b, journal_ref);
else
bch_btree_node_write_sync(b);
}
if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) {
if (!b->level)
bch_btree_leaf_dirty(b, journal_ref);
else
bch_btree_node_write(b, &cl);
}
return 0;
mutex_unlock(&b->write_lock);
/* wait for btree node write if necessary, after unlock */
closure_sync(&cl);
return 0;
split:
if (current->bio_list) {
op->lock = b->c->root->level + 1;
return -EAGAIN;
} else if (op->lock <= b->c->root->level) {
op->lock = b->c->root->level + 1;
return -EINTR;
} else {
/* Invalidated all iterators */
int ret = btree_split(b, op, insert_keys, replace_key);
if (bch_keylist_empty(insert_keys))
return 0;
else if (!ret)
return -EINTR;
return ret;
}
}
......@@ -2403,18 +2516,3 @@ void bch_keybuf_init(struct keybuf *buf)
spin_lock_init(&buf->lock);
array_allocator_init(&buf->freelist);
}
void bch_btree_exit(void)
{
if (btree_io_wq)
destroy_workqueue(btree_io_wq);
}
int __init bch_btree_init(void)
{
btree_io_wq = create_singlethread_workqueue("bch_btree_io");
if (!btree_io_wq)
return -ENOMEM;
return 0;
}
......@@ -127,6 +127,8 @@ struct btree {
struct cache_set *c;
struct btree *parent;
struct mutex write_lock;
unsigned long flags;
uint16_t written; /* would be nice to kill */
uint8_t level;
......@@ -236,11 +238,13 @@ static inline void rw_unlock(bool w, struct btree *b)
}
void bch_btree_node_read_done(struct btree *);
void __bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_set_root(struct btree *);
struct btree *bch_btree_node_alloc(struct cache_set *, int, bool);
struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, int, bool);
struct btree *bch_btree_node_alloc(struct cache_set *, struct btree_op *, int);
struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *,
struct bkey *, int, bool);
int bch_btree_insert_check_key(struct btree *, struct btree_op *,
struct bkey *);
......@@ -248,10 +252,10 @@ int bch_btree_insert(struct cache_set *, struct keylist *,
atomic_t *, struct bkey *);
int bch_gc_thread_start(struct cache_set *);
size_t bch_btree_gc_finish(struct cache_set *);
void bch_initial_gc_finish(struct cache_set *);
void bch_moving_gc(struct cache_set *);
int bch_btree_check(struct cache_set *);
uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
void bch_initial_mark_key(struct cache_set *, int, struct bkey *);
static inline void wake_up_gc(struct cache_set *c)
{
......
......@@ -194,9 +194,9 @@ static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
mutex_unlock(&b->c->bucket_lock);
bch_extent_to_text(buf, sizeof(buf), k);
btree_bug(b,
"inconsistent btree pointer %s: bucket %zi pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
"inconsistent btree pointer %s: bucket %zi pin %i prio %i gen %i last_gc %i mark %llu",
buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
g->prio, g->gen, g->last_gc, GC_MARK(g));
return true;
}
......@@ -308,6 +308,16 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
return NULL;
}
static void bch_subtract_dirty(struct bkey *k,
struct cache_set *c,
uint64_t offset,
int sectors)
{
if (KEY_DIRTY(k))
bcache_dev_sectors_dirty_add(c, KEY_INODE(k),
offset, -sectors);
}
static bool bch_extent_insert_fixup(struct btree_keys *b,
struct bkey *insert,
struct btree_iter *iter,
......@@ -315,13 +325,6 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
{
struct cache_set *c = container_of(b, struct btree, keys)->c;
void subtract_dirty(struct bkey *k, uint64_t offset, int sectors)
{
if (KEY_DIRTY(k))
bcache_dev_sectors_dirty_add(c, KEY_INODE(k),
offset, -sectors);
}
uint64_t old_offset;
unsigned old_size, sectors_found = 0;
......@@ -398,7 +401,8 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
struct bkey *top;
subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert));
bch_subtract_dirty(k, c, KEY_START(insert),
KEY_SIZE(insert));
if (bkey_written(b, k)) {
/*
......@@ -448,7 +452,7 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
}
}
subtract_dirty(k, old_offset, old_size - KEY_SIZE(k));
bch_subtract_dirty(k, c, old_offset, old_size - KEY_SIZE(k));
}
check_failed:
......@@ -499,9 +503,9 @@ static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
if (mutex_trylock(&b->c->bucket_lock)) {
if (b->c->gc_mark_valid &&
((GC_MARK(g) != GC_MARK_DIRTY &&
KEY_DIRTY(k)) ||
GC_MARK(g) == GC_MARK_METADATA))
(!GC_MARK(g) ||
GC_MARK(g) == GC_MARK_METADATA ||
(GC_MARK(g) != GC_MARK_DIRTY && KEY_DIRTY(k))))
goto err;
if (g->prio == BTREE_PRIO)
......@@ -515,9 +519,9 @@ static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
mutex_unlock(&b->c->bucket_lock);
bch_extent_to_text(buf, sizeof(buf), k);
btree_bug(b,
"inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
"inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu",
buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
g->prio, g->gen, g->last_gc, GC_MARK(g));
return true;
}
......
......@@ -237,8 +237,14 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
for (i = 0; i < ca->sb.njournal_buckets; i++)
if (ja->seq[i] > seq) {
seq = ja->seq[i];
ja->cur_idx = ja->discard_idx =
ja->last_idx = i;
/*
* When journal_reclaim() goes to allocate for
* the first time, it'll use the bucket after
* ja->cur_idx
*/
ja->cur_idx = i;
ja->last_idx = ja->discard_idx = (i + 1) %
ca->sb.njournal_buckets;
}
}
......@@ -288,16 +294,11 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
k = bkey_next(k)) {
unsigned j;
for (j = 0; j < KEY_PTRS(k); j++) {
struct bucket *g = PTR_BUCKET(c, k, j);
atomic_inc(&g->pin);
for (j = 0; j < KEY_PTRS(k); j++)
if (ptr_available(c, k, j))
atomic_inc(&PTR_BUCKET(c, k, j)->pin);
if (g->prio == BTREE_PRIO &&
!ptr_stale(c, k, j))
g->prio = INITIAL_PRIO;
}
__bch_btree_mark_key(c, 0, k);
bch_initial_mark_key(c, 0, k);
}
}
}
......@@ -312,8 +313,6 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
uint64_t start = i->j.last_seq, end = i->j.seq, n = start;
struct keylist keylist;
bch_keylist_init(&keylist);
list_for_each_entry(i, list, list) {
BUG_ON(i->pin && atomic_read(i->pin) != 1);
......@@ -326,8 +325,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
k = bkey_next(k)) {
trace_bcache_journal_replay_key(k);
bkey_copy(keylist.top, k);
bch_keylist_push(&keylist);
bch_keylist_init_single(&keylist, k);
ret = bch_btree_insert(s, &keylist, i->pin, NULL);
if (ret)
......@@ -383,16 +381,15 @@ static void btree_flush_write(struct cache_set *c)
b = best;
if (b) {
rw_lock(true, b, b->level);
mutex_lock(&b->write_lock);
if (!btree_current_write(b)->journal) {
rw_unlock(true, b);
mutex_unlock(&b->write_lock);
/* We raced */
goto retry;
}
bch_btree_node_write(b, NULL);
rw_unlock(true, b);
__bch_btree_node_write(b, NULL);
mutex_unlock(&b->write_lock);
}
}
......@@ -536,6 +533,7 @@ void bch_journal_next(struct journal *j)
atomic_set(&fifo_back(&j->pin), 1);
j->cur->data->seq = ++j->seq;
j->cur->dirty = false;
j->cur->need_write = false;
j->cur->data->keys = 0;
......@@ -731,7 +729,10 @@ static void journal_write_work(struct work_struct *work)
struct cache_set,
journal.work);
spin_lock(&c->journal.lock);
journal_try_write(c);
if (c->journal.cur->dirty)
journal_try_write(c);
else
spin_unlock(&c->journal.lock);
}
/*
......@@ -761,7 +762,8 @@ atomic_t *bch_journal(struct cache_set *c,
if (parent) {
closure_wait(&w->wait, parent);
journal_try_write(c);
} else if (!w->need_write) {
} else if (!w->dirty) {
w->dirty = true;
schedule_delayed_work(&c->journal.work,
msecs_to_jiffies(c->journal_delay_ms));
spin_unlock(&c->journal.lock);
......
......@@ -95,6 +95,7 @@ struct journal_write {
struct cache_set *c;
struct closure_waitlist wait;
bool dirty;
bool need_write;
};
......
......@@ -24,12 +24,10 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
moving_gc_keys);
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++) {
struct bucket *g = PTR_BUCKET(c, k, i);
if (GC_MOVE(g))
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i) &&
GC_MOVE(PTR_BUCKET(c, k, i)))
return true;
}
return false;
}
......@@ -115,7 +113,7 @@ static void write_moving(struct closure *cl)
closure_call(&op->cl, bch_data_insert, NULL, cl);
}
continue_at(cl, write_moving_finish, system_wq);
continue_at(cl, write_moving_finish, op->wq);
}
static void read_moving_submit(struct closure *cl)
......@@ -125,7 +123,7 @@ static void read_moving_submit(struct closure *cl)
bch_submit_bbio(bio, io->op.c, &io->w->key, 0);
continue_at(cl, write_moving, system_wq);
continue_at(cl, write_moving, io->op.wq);
}
static void read_moving(struct cache_set *c)
......@@ -160,6 +158,7 @@ static void read_moving(struct cache_set *c)
io->w = w;
io->op.inode = KEY_INODE(&w->key);
io->op.c = c;
io->op.wq = c->moving_gc_wq;
moving_init(io);
bio = &io->bio.bio;
......@@ -216,7 +215,10 @@ void bch_moving_gc(struct cache_set *c)
ca->heap.used = 0;
for_each_bucket(b, ca) {
if (!GC_SECTORS_USED(b))
if (GC_MARK(b) == GC_MARK_METADATA ||
!GC_SECTORS_USED(b) ||
GC_SECTORS_USED(b) == ca->sb.bucket_size ||
atomic_read(&b->pin))
continue;
if (!heap_full(&ca->heap)) {
......
......@@ -12,11 +12,9 @@
#include "request.h"
#include "writeback.h"
#include <linux/cgroup.h>
#include <linux/module.h>
#include <linux/hash.h>
#include <linux/random.h>
#include "blk-cgroup.h"
#include <trace/events/bcache.h>
......@@ -27,171 +25,13 @@ struct kmem_cache *bch_search_cache;
static void bch_data_insert_start(struct closure *);
/* Cgroup interface */
#ifdef CONFIG_CGROUP_BCACHE
static struct bch_cgroup bcache_default_cgroup = { .cache_mode = -1 };
static struct bch_cgroup *cgroup_to_bcache(struct cgroup *cgroup)
{
struct cgroup_subsys_state *css;
return cgroup &&
(css = cgroup_subsys_state(cgroup, bcache_subsys_id))
? container_of(css, struct bch_cgroup, css)
: &bcache_default_cgroup;
}
struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio)
{
struct cgroup_subsys_state *css = bio->bi_css
? cgroup_subsys_state(bio->bi_css->cgroup, bcache_subsys_id)
: task_subsys_state(current, bcache_subsys_id);
return css
? container_of(css, struct bch_cgroup, css)
: &bcache_default_cgroup;
}
static ssize_t cache_mode_read(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
char __user *buf, size_t nbytes, loff_t *ppos)
{
char tmp[1024];
int len = bch_snprint_string_list(tmp, PAGE_SIZE, bch_cache_modes,
cgroup_to_bcache(cgrp)->cache_mode + 1);
if (len < 0)
return len;
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
static int cache_mode_write(struct cgroup *cgrp, struct cftype *cft,
const char *buf)
{
int v = bch_read_string_list(buf, bch_cache_modes);
if (v < 0)
return v;
cgroup_to_bcache(cgrp)->cache_mode = v - 1;
return 0;
}
static u64 bch_verify_read(struct cgroup *cgrp, struct cftype *cft)
{
return cgroup_to_bcache(cgrp)->verify;
}
static int bch_verify_write(struct cgroup *cgrp, struct cftype *cft, u64 val)
{
cgroup_to_bcache(cgrp)->verify = val;
return 0;
}
static u64 bch_cache_hits_read(struct cgroup *cgrp, struct cftype *cft)
{
struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp);
return atomic_read(&bcachecg->stats.cache_hits);
}
static u64 bch_cache_misses_read(struct cgroup *cgrp, struct cftype *cft)
{
struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp);
return atomic_read(&bcachecg->stats.cache_misses);
}
static u64 bch_cache_bypass_hits_read(struct cgroup *cgrp,
struct cftype *cft)
{
struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp);
return atomic_read(&bcachecg->stats.cache_bypass_hits);
}
static u64 bch_cache_bypass_misses_read(struct cgroup *cgrp,
struct cftype *cft)
{
struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp);
return atomic_read(&bcachecg->stats.cache_bypass_misses);
}
static struct cftype bch_files[] = {
{
.name = "cache_mode",
.read = cache_mode_read,
.write_string = cache_mode_write,
},
{
.name = "verify",
.read_u64 = bch_verify_read,
.write_u64 = bch_verify_write,
},
{
.name = "cache_hits",
.read_u64 = bch_cache_hits_read,
},
{
.name = "cache_misses",
.read_u64 = bch_cache_misses_read,
},
{
.name = "cache_bypass_hits",
.read_u64 = bch_cache_bypass_hits_read,
},
{
.name = "cache_bypass_misses",
.read_u64 = bch_cache_bypass_misses_read,
},
{ } /* terminate */
};
static void init_bch_cgroup(struct bch_cgroup *cg)
{
cg->cache_mode = -1;
}
static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup)
{
struct bch_cgroup *cg;
cg = kzalloc(sizeof(*cg), GFP_KERNEL);
if (!cg)
return ERR_PTR(-ENOMEM);
init_bch_cgroup(cg);
return &cg->css;
}
static void bcachecg_destroy(struct cgroup *cgroup)
{
struct bch_cgroup *cg = cgroup_to_bcache(cgroup);
kfree(cg);
}
struct cgroup_subsys bcache_subsys = {
.create = bcachecg_create,
.destroy = bcachecg_destroy,
.subsys_id = bcache_subsys_id,
.name = "bcache",
.module = THIS_MODULE,
};
EXPORT_SYMBOL_GPL(bcache_subsys);
#endif
static unsigned cache_mode(struct cached_dev *dc, struct bio *bio)
{
#ifdef CONFIG_CGROUP_BCACHE
int r = bch_bio_to_cgroup(bio)->cache_mode;
if (r >= 0)
return r;
#endif
return BDEV_CACHE_MODE(&dc->sb);
}
static bool verify(struct cached_dev *dc, struct bio *bio)
{
#ifdef CONFIG_CGROUP_BCACHE
if (bch_bio_to_cgroup(bio)->verify)
return true;
#endif
return dc->verify;
}
......@@ -248,7 +88,7 @@ static void bch_data_insert_keys(struct closure *cl)
atomic_dec_bug(journal_ref);
if (!op->insert_data_done)
continue_at(cl, bch_data_insert_start, bcache_wq);
continue_at(cl, bch_data_insert_start, op->wq);
bch_keylist_free(&op->insert_keys);
closure_return(cl);
......@@ -297,7 +137,7 @@ static void bch_data_invalidate(struct closure *cl)
op->insert_data_done = true;
bio_put(bio);
out:
continue_at(cl, bch_data_insert_keys, bcache_wq);
continue_at(cl, bch_data_insert_keys, op->wq);
}
static void bch_data_insert_error(struct closure *cl)
......@@ -340,7 +180,7 @@ static void bch_data_insert_endio(struct bio *bio, int error)
if (op->writeback)
op->error = error;
else if (!op->replace)
set_closure_fn(cl, bch_data_insert_error, bcache_wq);
set_closure_fn(cl, bch_data_insert_error, op->wq);
else
set_closure_fn(cl, NULL, NULL);
}
......@@ -376,7 +216,7 @@ static void bch_data_insert_start(struct closure *cl)
if (bch_keylist_realloc(&op->insert_keys,
3 + (op->csum ? 1 : 0),
op->c))
continue_at(cl, bch_data_insert_keys, bcache_wq);
continue_at(cl, bch_data_insert_keys, op->wq);
k = op->insert_keys.top;
bkey_init(k);
......@@ -413,7 +253,7 @@ static void bch_data_insert_start(struct closure *cl)
} while (n != bio);
op->insert_data_done = true;
continue_at(cl, bch_data_insert_keys, bcache_wq);
continue_at(cl, bch_data_insert_keys, op->wq);
err:
/* bch_alloc_sectors() blocks if s->writeback = true */
BUG_ON(op->writeback);
......@@ -442,7 +282,7 @@ static void bch_data_insert_start(struct closure *cl)
bio_put(bio);
if (!bch_keylist_empty(&op->insert_keys))
continue_at(cl, bch_data_insert_keys, bcache_wq);
continue_at(cl, bch_data_insert_keys, op->wq);
else
closure_return(cl);
}
......@@ -824,6 +664,7 @@ static inline struct search *search_alloc(struct bio *bio,
s->iop.error = 0;
s->iop.flags = 0;
s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
s->iop.wq = bcache_wq;
return s;
}
......@@ -1203,22 +1044,13 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
static int flash_dev_cache_miss(struct btree *b, struct search *s,
struct bio *bio, unsigned sectors)
{
struct bio_vec bv;
struct bvec_iter iter;
/* Zero fill bio */
unsigned bytes = min(sectors, bio_sectors(bio)) << 9;
bio_for_each_segment(bv, bio, iter) {
unsigned j = min(bv.bv_len >> 9, sectors);
void *p = kmap(bv.bv_page);
memset(p + bv.bv_offset, 0, j << 9);
kunmap(bv.bv_page);
sectors -= j;
}
swap(bio->bi_iter.bi_size, bytes);
zero_fill_bio(bio);
swap(bio->bi_iter.bi_size, bytes);
bio_advance(bio, min(sectors << 9, bio->bi_iter.bi_size));
bio_advance(bio, bytes);
if (!bio->bi_iter.bi_size)
return MAP_DONE;
......@@ -1313,9 +1145,6 @@ void bch_flash_dev_request_init(struct bcache_device *d)
void bch_request_exit(void)
{
#ifdef CONFIG_CGROUP_BCACHE
cgroup_unload_subsys(&bcache_subsys);
#endif
if (bch_search_cache)
kmem_cache_destroy(bch_search_cache);
}
......@@ -1326,11 +1155,5 @@ int __init bch_request_init(void)
if (!bch_search_cache)
return -ENOMEM;
#ifdef CONFIG_CGROUP_BCACHE
cgroup_load_subsys(&bcache_subsys);
init_bch_cgroup(&bcache_default_cgroup);
cgroup_add_cftypes(&bcache_subsys, bch_files);
#endif
return 0;
}
#ifndef _BCACHE_REQUEST_H_
#define _BCACHE_REQUEST_H_
#include <linux/cgroup.h>
struct data_insert_op {
struct closure cl;
struct cache_set *c;
struct bio *bio;
struct workqueue_struct *wq;
unsigned inode;
uint16_t write_point;
......@@ -41,20 +40,4 @@ void bch_flash_dev_request_init(struct bcache_device *d);
extern struct kmem_cache *bch_search_cache, *bch_passthrough_cache;
struct bch_cgroup {
#ifdef CONFIG_CGROUP_BCACHE
struct cgroup_subsys_state css;
#endif
/*
* We subtract one from the index into bch_cache_modes[], so that
* default == -1; this makes it so the rest match up with d->cache_mode,
* and we use d->cache_mode if cgrp->cache_mode < 0
*/
short cache_mode;
bool verify;
struct cache_stat_collector stats;
};
struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio);
#endif /* _BCACHE_REQUEST_H_ */
......@@ -201,9 +201,6 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d,
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
mark_cache_stats(&dc->accounting.collector, hit, bypass);
mark_cache_stats(&c->accounting.collector, hit, bypass);
#ifdef CONFIG_CGROUP_BCACHE
mark_cache_stats(&(bch_bio_to_cgroup(s->orig_bio)->stats), hit, bypass);
#endif
}
void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d)
......
......@@ -541,9 +541,6 @@ static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw)
closure_sync(cl);
}
#define buckets_free(c) "free %zu, free_inc %zu, unused %zu", \
fifo_used(&c->free), fifo_used(&c->free_inc), fifo_used(&c->unused)
void bch_prio_write(struct cache *ca)
{
int i;
......@@ -554,10 +551,6 @@ void bch_prio_write(struct cache *ca)
lockdep_assert_held(&ca->set->bucket_lock);
for (b = ca->buckets;
b < ca->buckets + ca->sb.nbuckets; b++)
b->disk_gen = b->gen;
ca->disk_buckets->seq++;
atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
......@@ -601,14 +594,17 @@ void bch_prio_write(struct cache *ca)
mutex_lock(&ca->set->bucket_lock);
ca->need_save_prio = 0;
/*
* Don't want the old priorities to get garbage collected until after we
* finish writing the new ones, and they're journalled
*/
for (i = 0; i < prio_buckets(ca); i++)
for (i = 0; i < prio_buckets(ca); i++) {
if (ca->prio_last_buckets[i])
__bch_bucket_free(ca,
&ca->buckets[ca->prio_last_buckets[i]]);
ca->prio_last_buckets[i] = ca->prio_buckets[i];
}
}
static void prio_read(struct cache *ca, uint64_t bucket)
......@@ -639,7 +635,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
}
b->prio = le16_to_cpu(d->prio);
b->gen = b->disk_gen = b->last_gc = b->gc_gen = d->gen;
b->gen = b->last_gc = d->gen;
}
}
......@@ -843,6 +839,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
q->limits.max_segment_size = UINT_MAX;
q->limits.max_segments = BIO_MAX_PAGES;
q->limits.max_discard_sectors = UINT_MAX;
q->limits.discard_granularity = 512;
q->limits.io_min = block_size;
q->limits.logical_block_size = block_size;
q->limits.physical_block_size = block_size;
......@@ -1355,6 +1352,8 @@ static void cache_set_free(struct closure *cl)
bch_bset_sort_state_free(&c->sort);
free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
if (c->moving_gc_wq)
destroy_workqueue(c->moving_gc_wq);
if (c->bio_split)
bioset_free(c->bio_split);
if (c->fill_iter)
......@@ -1395,14 +1394,21 @@ static void cache_set_flush(struct closure *cl)
list_add(&c->root->list, &c->btree_cache);
/* Should skip this if we're unregistering because of an error */
list_for_each_entry(b, &c->btree_cache, list)
list_for_each_entry(b, &c->btree_cache, list) {
mutex_lock(&b->write_lock);
if (btree_node_dirty(b))
bch_btree_node_write(b, NULL);
__bch_btree_node_write(b, NULL);
mutex_unlock(&b->write_lock);
}
for_each_cache(ca, c, i)
if (ca->alloc_thread)
kthread_stop(ca->alloc_thread);
cancel_delayed_work_sync(&c->journal.work);
/* flush last journal entry if needed */
c->journal.work.work.func(&c->journal.work.work);
closure_return(cl);
}
......@@ -1485,14 +1491,13 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
sema_init(&c->sb_write_mutex, 1);
mutex_init(&c->bucket_lock);
init_waitqueue_head(&c->try_wait);
init_waitqueue_head(&c->btree_cache_wait);
init_waitqueue_head(&c->bucket_wait);
sema_init(&c->uuid_write_mutex, 1);
spin_lock_init(&c->btree_gc_time.lock);
spin_lock_init(&c->btree_split_time.lock);
spin_lock_init(&c->btree_read_time.lock);
spin_lock_init(&c->try_harder_time.lock);
bch_moving_init_cache_set(c);
......@@ -1517,6 +1522,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
!(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
!(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
!(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
!(c->moving_gc_wq = create_workqueue("bcache_gc")) ||
bch_journal_alloc(c) ||
bch_btree_cache_alloc(c) ||
bch_open_buckets_alloc(c) ||
......@@ -1580,7 +1586,7 @@ static void run_cache_set(struct cache_set *c)
goto err;
err = "error reading btree root";
c->root = bch_btree_node_get(c, k, j->btree_level, true);
c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true);
if (IS_ERR_OR_NULL(c->root))
goto err;
......@@ -1596,7 +1602,7 @@ static void run_cache_set(struct cache_set *c)
goto err;
bch_journal_mark(c, &journal);
bch_btree_gc_finish(c);
bch_initial_gc_finish(c);
pr_debug("btree_check() done");
/*
......@@ -1638,7 +1644,7 @@ static void run_cache_set(struct cache_set *c)
ca->sb.d[j] = ca->sb.first_bucket + j;
}
bch_btree_gc_finish(c);
bch_initial_gc_finish(c);
err = "error starting allocator thread";
for_each_cache(ca, c, i)
......@@ -1655,12 +1661,14 @@ static void run_cache_set(struct cache_set *c)
goto err;
err = "cannot allocate new btree root";
c->root = bch_btree_node_alloc(c, 0, true);
c->root = bch_btree_node_alloc(c, NULL, 0);
if (IS_ERR_OR_NULL(c->root))
goto err;
mutex_lock(&c->root->write_lock);
bkey_copy_key(&c->root->key, &MAX_KEY);
bch_btree_node_write(c->root, &cl);
mutex_unlock(&c->root->write_lock);
bch_btree_set_root(c->root);
rw_unlock(true, c->root);
......@@ -1782,7 +1790,6 @@ void bch_cache_release(struct kobject *kobj)
vfree(ca->buckets);
free_heap(&ca->heap);
free_fifo(&ca->unused);
free_fifo(&ca->free_inc);
for (i = 0; i < RESERVE_NR; i++)
......@@ -1819,7 +1826,6 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) ||
!init_fifo(&ca->unused, free << 2, GFP_KERNEL) ||
!init_heap(&ca->heap, free << 3, GFP_KERNEL) ||
!(ca->buckets = vzalloc(sizeof(struct bucket) *
ca->sb.nbuckets)) ||
......@@ -1834,13 +1840,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
for_each_bucket(b, ca)
atomic_set(&b->pin, 0);
if (bch_cache_allocator_init(ca))
goto err;
return 0;
err:
kobject_put(&ca->kobj);
return -ENOMEM;
}
static void register_cache(struct cache_sb *sb, struct page *sb_page,
......@@ -1869,7 +1869,10 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page,
if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache"))
goto err;
mutex_lock(&bch_register_lock);
err = register_cache_set(ca);
mutex_unlock(&bch_register_lock);
if (err)
goto err;
......@@ -1931,8 +1934,6 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!try_module_get(THIS_MODULE))
return -EBUSY;
mutex_lock(&bch_register_lock);
if (!(path = kstrndup(buffer, size, GFP_KERNEL)) ||
!(sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL)))
goto err;
......@@ -1965,7 +1966,9 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!dc)
goto err_close;
mutex_lock(&bch_register_lock);
register_bdev(sb, sb_page, bdev, dc);
mutex_unlock(&bch_register_lock);
} else {
struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca)
......@@ -1978,7 +1981,6 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
put_page(sb_page);
kfree(sb);
kfree(path);
mutex_unlock(&bch_register_lock);
module_put(THIS_MODULE);
return ret;
......@@ -2057,7 +2059,6 @@ static void bcache_exit(void)
{
bch_debug_exit();
bch_request_exit();
bch_btree_exit();
if (bcache_kobj)
kobject_put(bcache_kobj);
if (bcache_wq)
......@@ -2087,7 +2088,6 @@ static int __init bcache_init(void)
if (!(bcache_wq = create_workqueue("bcache")) ||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
sysfs_create_files(bcache_kobj, files) ||
bch_btree_init() ||
bch_request_init() ||
bch_debug_init(bcache_kobj))
goto err;
......
......@@ -54,7 +54,6 @@ sysfs_time_stats_attribute(btree_gc, sec, ms);
sysfs_time_stats_attribute(btree_split, sec, us);
sysfs_time_stats_attribute(btree_sort, ms, us);
sysfs_time_stats_attribute(btree_read, ms, us);
sysfs_time_stats_attribute(try_harder, ms, us);
read_attribute(btree_nodes);
read_attribute(btree_used_percent);
......@@ -406,7 +405,7 @@ struct bset_stats_op {
struct bset_stats stats;
};
static int btree_bset_stats(struct btree_op *b_op, struct btree *b)
static int bch_btree_bset_stats(struct btree_op *b_op, struct btree *b)
{
struct bset_stats_op *op = container_of(b_op, struct bset_stats_op, op);
......@@ -424,7 +423,7 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf)
memset(&op, 0, sizeof(op));
bch_btree_op_init(&op.op, -1);
ret = bch_btree_map_nodes(&op.op, c, &ZERO_KEY, btree_bset_stats);
ret = bch_btree_map_nodes(&op.op, c, &ZERO_KEY, bch_btree_bset_stats);
if (ret < 0)
return ret;
......@@ -442,81 +441,81 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf)
op.stats.floats, op.stats.failed);
}
SHOW(__bch_cache_set)
static unsigned bch_root_usage(struct cache_set *c)
{
unsigned root_usage(struct cache_set *c)
{
unsigned bytes = 0;
struct bkey *k;
struct btree *b;
struct btree_iter iter;
unsigned bytes = 0;
struct bkey *k;
struct btree *b;
struct btree_iter iter;
goto lock_root;
goto lock_root;
do {
rw_unlock(false, b);
do {
rw_unlock(false, b);
lock_root:
b = c->root;
rw_lock(false, b, b->level);
} while (b != c->root);
b = c->root;
rw_lock(false, b, b->level);
} while (b != c->root);
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
bytes += bkey_bytes(k);
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
bytes += bkey_bytes(k);
rw_unlock(false, b);
rw_unlock(false, b);
return (bytes * 100) / btree_bytes(c);
}
return (bytes * 100) / btree_bytes(c);
}
size_t cache_size(struct cache_set *c)
{
size_t ret = 0;
struct btree *b;
static size_t bch_cache_size(struct cache_set *c)
{
size_t ret = 0;
struct btree *b;
mutex_lock(&c->bucket_lock);
list_for_each_entry(b, &c->btree_cache, list)
ret += 1 << (b->keys.page_order + PAGE_SHIFT);
mutex_lock(&c->bucket_lock);
list_for_each_entry(b, &c->btree_cache, list)
ret += 1 << (b->keys.page_order + PAGE_SHIFT);
mutex_unlock(&c->bucket_lock);
return ret;
}
unsigned cache_max_chain(struct cache_set *c)
{
unsigned ret = 0;
struct hlist_head *h;
mutex_unlock(&c->bucket_lock);
return ret;
}
mutex_lock(&c->bucket_lock);
static unsigned bch_cache_max_chain(struct cache_set *c)
{
unsigned ret = 0;
struct hlist_head *h;
for (h = c->bucket_hash;
h < c->bucket_hash + (1 << BUCKET_HASH_BITS);
h++) {
unsigned i = 0;
struct hlist_node *p;
mutex_lock(&c->bucket_lock);
hlist_for_each(p, h)
i++;
for (h = c->bucket_hash;
h < c->bucket_hash + (1 << BUCKET_HASH_BITS);
h++) {
unsigned i = 0;
struct hlist_node *p;
ret = max(ret, i);
}
hlist_for_each(p, h)
i++;
mutex_unlock(&c->bucket_lock);
return ret;
ret = max(ret, i);
}
unsigned btree_used(struct cache_set *c)
{
return div64_u64(c->gc_stats.key_bytes * 100,
(c->gc_stats.nodes ?: 1) * btree_bytes(c));
}
mutex_unlock(&c->bucket_lock);
return ret;
}
unsigned average_key_size(struct cache_set *c)
{
return c->gc_stats.nkeys
? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
: 0;
}
static unsigned bch_btree_used(struct cache_set *c)
{
return div64_u64(c->gc_stats.key_bytes * 100,
(c->gc_stats.nodes ?: 1) * btree_bytes(c));
}
static unsigned bch_average_key_size(struct cache_set *c)
{
return c->gc_stats.nkeys
? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
: 0;
}
SHOW(__bch_cache_set)
{
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
sysfs_print(synchronous, CACHE_SYNC(&c->sb));
......@@ -524,21 +523,20 @@ SHOW(__bch_cache_set)
sysfs_hprint(bucket_size, bucket_bytes(c));
sysfs_hprint(block_size, block_bytes(c));
sysfs_print(tree_depth, c->root->level);
sysfs_print(root_usage_percent, root_usage(c));
sysfs_print(root_usage_percent, bch_root_usage(c));
sysfs_hprint(btree_cache_size, cache_size(c));
sysfs_print(btree_cache_max_chain, cache_max_chain(c));
sysfs_hprint(btree_cache_size, bch_cache_size(c));
sysfs_print(btree_cache_max_chain, bch_cache_max_chain(c));
sysfs_print(cache_available_percent, 100 - c->gc_stats.in_use);
sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms);
sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us);
sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us);
sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us);
sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us);
sysfs_print(btree_used_percent, btree_used(c));
sysfs_print(btree_used_percent, bch_btree_used(c));
sysfs_print(btree_nodes, c->gc_stats.nodes);
sysfs_hprint(average_key_size, average_key_size(c));
sysfs_hprint(average_key_size, bch_average_key_size(c));
sysfs_print(cache_read_races,
atomic_long_read(&c->cache_read_races));
......@@ -709,7 +707,6 @@ static struct attribute *bch_cache_set_internal_files[] = {
sysfs_time_stats_attribute_list(btree_split, sec, us)
sysfs_time_stats_attribute_list(btree_sort, ms, us)
sysfs_time_stats_attribute_list(btree_read, ms, us)
sysfs_time_stats_attribute_list(try_harder, ms, us)
&sysfs_btree_nodes,
&sysfs_btree_used_percent,
......@@ -761,7 +758,9 @@ SHOW(__bch_cache)
int cmp(const void *l, const void *r)
{ return *((uint16_t *) r) - *((uint16_t *) l); }
size_t n = ca->sb.nbuckets, i, unused, btree;
struct bucket *b;
size_t n = ca->sb.nbuckets, i;
size_t unused = 0, available = 0, dirty = 0, meta = 0;
uint64_t sum = 0;
/* Compute 31 quantiles */
uint16_t q[31], *p, *cached;
......@@ -772,6 +771,17 @@ SHOW(__bch_cache)
return -ENOMEM;
mutex_lock(&ca->set->bucket_lock);
for_each_bucket(b, ca) {
if (!GC_SECTORS_USED(b))
unused++;
if (GC_MARK(b) == GC_MARK_RECLAIMABLE)
available++;
if (GC_MARK(b) == GC_MARK_DIRTY)
dirty++;
if (GC_MARK(b) == GC_MARK_METADATA)
meta++;
}
for (i = ca->sb.first_bucket; i < n; i++)
p[i] = ca->buckets[i].prio;
mutex_unlock(&ca->set->bucket_lock);
......@@ -786,10 +796,7 @@ SHOW(__bch_cache)
while (cached < p + n &&
*cached == BTREE_PRIO)
cached++;
btree = cached - p;
n -= btree;
cached++, n--;
for (i = 0; i < n; i++)
sum += INITIAL_PRIO - cached[i];
......@@ -805,12 +812,16 @@ SHOW(__bch_cache)
ret = scnprintf(buf, PAGE_SIZE,
"Unused: %zu%%\n"
"Clean: %zu%%\n"
"Dirty: %zu%%\n"
"Metadata: %zu%%\n"
"Average: %llu\n"
"Sectors per Q: %zu\n"
"Quantiles: [",
unused * 100 / (size_t) ca->sb.nbuckets,
btree * 100 / (size_t) ca->sb.nbuckets, sum,
available * 100 / (size_t) ca->sb.nbuckets,
dirty * 100 / (size_t) ca->sb.nbuckets,
meta * 100 / (size_t) ca->sb.nbuckets, sum,
n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1));
for (i = 0; i < ARRAY_SIZE(q); i++)
......
......@@ -45,7 +45,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_invalidate);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
......
......@@ -399,26 +399,43 @@ TRACE_EVENT(bcache_keyscan,
/* Allocator */
TRACE_EVENT(bcache_alloc_invalidate,
TP_PROTO(struct cache *ca),
TP_ARGS(ca),
TRACE_EVENT(bcache_invalidate,
TP_PROTO(struct cache *ca, size_t bucket),
TP_ARGS(ca, bucket),
TP_STRUCT__entry(
__field(unsigned, free )
__field(unsigned, free_inc )
__field(unsigned, free_inc_size )
__field(unsigned, unused )
__field(unsigned, sectors )
__field(dev_t, dev )
__field(__u64, offset )
),
TP_fast_assign(
__entry->free = fifo_used(&ca->free[RESERVE_NONE]);
__entry->free_inc = fifo_used(&ca->free_inc);
__entry->free_inc_size = ca->free_inc.size;
__entry->unused = fifo_used(&ca->unused);
__entry->dev = ca->bdev->bd_dev;
__entry->offset = bucket << ca->set->bucket_bits;
__entry->sectors = GC_SECTORS_USED(&ca->buckets[bucket]);
),
TP_printk("free %u free_inc %u/%u unused %u", __entry->free,
__entry->free_inc, __entry->free_inc_size, __entry->unused)
TP_printk("invalidated %u sectors at %d,%d sector=%llu",
__entry->sectors, MAJOR(__entry->dev),
MINOR(__entry->dev), __entry->offset)
);
TRACE_EVENT(bcache_alloc,
TP_PROTO(struct cache *ca, size_t bucket),
TP_ARGS(ca, bucket),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(__u64, offset )
),
TP_fast_assign(
__entry->dev = ca->bdev->bd_dev;
__entry->offset = bucket << ca->set->bucket_bits;
),
TP_printk("allocated %d,%d sector=%llu", MAJOR(__entry->dev),
MINOR(__entry->dev), __entry->offset)
);
TRACE_EVENT(bcache_alloc_fail,
......@@ -426,21 +443,22 @@ TRACE_EVENT(bcache_alloc_fail,
TP_ARGS(ca, reserve),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(unsigned, free )
__field(unsigned, free_inc )
__field(unsigned, unused )
__field(unsigned, blocked )
),
TP_fast_assign(
__entry->dev = ca->bdev->bd_dev;
__entry->free = fifo_used(&ca->free[reserve]);
__entry->free_inc = fifo_used(&ca->free_inc);
__entry->unused = fifo_used(&ca->unused);
__entry->blocked = atomic_read(&ca->set->prio_blocked);
),
TP_printk("free %u free_inc %u unused %u blocked %u", __entry->free,
__entry->free_inc, __entry->unused, __entry->blocked)
TP_printk("alloc fail %d,%d free %u free_inc %u blocked %u",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->free,
__entry->free_inc, __entry->blocked)
);
/* Background writeback */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment