Commit c37511b8 authored by Kent Overstreet's avatar Kent Overstreet

bcache: Fix/revamp tracepoints

The tracepoints were reworked to be more sensible, and fixed a null
pointer deref in one of the tracepoints.

Converted some of the pr_debug()s to tracepoints - this is partly a
performance optimization; it used to be that with DEBUG or
CONFIG_DYNAMIC_DEBUG pr_debug() was an empty macro; but at some point it
was changed to an empty inline function.

Some of the pr_debug() statements had rather expensive function calls as
part of the arguments, so this code was getting run unnecessarily even
on non debug kernels - in some fast paths, too.
Signed-off-by: default avatarKent Overstreet <koverstreet@google.com>
parent 57943511
...@@ -65,6 +65,7 @@ ...@@ -65,6 +65,7 @@
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/random.h> #include <linux/random.h>
#include <trace/events/bcache.h>
#define MAX_IN_FLIGHT_DISCARDS 8U #define MAX_IN_FLIGHT_DISCARDS 8U
...@@ -351,10 +352,7 @@ static void invalidate_buckets(struct cache *ca) ...@@ -351,10 +352,7 @@ static void invalidate_buckets(struct cache *ca)
break; break;
} }
pr_debug("free %zu/%zu free_inc %zu/%zu unused %zu/%zu", trace_bcache_alloc_invalidate(ca);
fifo_used(&ca->free), ca->free.size,
fifo_used(&ca->free_inc), ca->free_inc.size,
fifo_used(&ca->unused), ca->unused.size);
} }
#define allocator_wait(ca, cond) \ #define allocator_wait(ca, cond) \
...@@ -473,9 +471,7 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl) ...@@ -473,9 +471,7 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl)
return r; return r;
} }
pr_debug("alloc failure: blocked %i free %zu free_inc %zu unused %zu", trace_bcache_alloc_fail(ca);
atomic_read(&ca->set->prio_blocked), fifo_used(&ca->free),
fifo_used(&ca->free_inc), fifo_used(&ca->unused));
if (cl) { if (cl) {
closure_wait(&ca->set->bucket_wait, cl); closure_wait(&ca->set->bucket_wait, cl);
......
...@@ -178,7 +178,6 @@ ...@@ -178,7 +178,6 @@
#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/blktrace_api.h>
#include <linux/kobject.h> #include <linux/kobject.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/mutex.h> #include <linux/mutex.h>
...@@ -901,8 +900,6 @@ static inline unsigned local_clock_us(void) ...@@ -901,8 +900,6 @@ static inline unsigned local_clock_us(void)
return local_clock() >> 10; return local_clock() >> 10;
} }
#define MAX_BSETS 4U
#define BTREE_PRIO USHRT_MAX #define BTREE_PRIO USHRT_MAX
#define INITIAL_PRIO 32768 #define INITIAL_PRIO 32768
...@@ -1107,23 +1104,6 @@ static inline void __bkey_put(struct cache_set *c, struct bkey *k) ...@@ -1107,23 +1104,6 @@ static inline void __bkey_put(struct cache_set *c, struct bkey *k)
atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin); atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin);
} }
/* Blktrace macros */
#define blktrace_msg(c, fmt, ...) \
do { \
struct request_queue *q = bdev_get_queue(c->bdev); \
if (q) \
blk_add_trace_msg(q, fmt, ##__VA_ARGS__); \
} while (0)
#define blktrace_msg_all(s, fmt, ...) \
do { \
struct cache *_c; \
unsigned i; \
for_each_cache(_c, (s), i) \
blktrace_msg(_c, fmt, ##__VA_ARGS__); \
} while (0)
static inline void cached_dev_put(struct cached_dev *dc) static inline void cached_dev_put(struct cached_dev *dc)
{ {
if (atomic_dec_and_test(&dc->count)) if (atomic_dec_and_test(&dc->count))
......
#ifndef _BCACHE_BSET_H #ifndef _BCACHE_BSET_H
#define _BCACHE_BSET_H #define _BCACHE_BSET_H
#include <linux/slab.h>
/* /*
* BKEYS: * BKEYS:
* *
...@@ -142,6 +144,8 @@ ...@@ -142,6 +144,8 @@
/* Btree key comparison/iteration */ /* Btree key comparison/iteration */
#define MAX_BSETS 4U
struct btree_iter { struct btree_iter {
size_t size, used; size_t size, used;
struct btree_iter_set { struct btree_iter_set {
......
...@@ -223,8 +223,9 @@ void bch_btree_node_read(struct btree *b) ...@@ -223,8 +223,9 @@ void bch_btree_node_read(struct btree *b)
struct closure cl; struct closure cl;
struct bio *bio; struct bio *bio;
trace_bcache_btree_read(b);
closure_init_stack(&cl); closure_init_stack(&cl);
pr_debug("%s", pbtree(b));
bio = bch_bbio_alloc(b->c); bio = bch_bbio_alloc(b->c);
bio->bi_rw = REQ_META|READ_SYNC; bio->bi_rw = REQ_META|READ_SYNC;
...@@ -234,7 +235,6 @@ void bch_btree_node_read(struct btree *b) ...@@ -234,7 +235,6 @@ void bch_btree_node_read(struct btree *b)
bch_bio_map(bio, b->sets[0].data); bch_bio_map(bio, b->sets[0].data);
trace_bcache_btree_read(bio);
bch_submit_bbio(bio, b->c, &b->key, 0); bch_submit_bbio(bio, b->c, &b->key, 0);
closure_sync(&cl); closure_sync(&cl);
...@@ -343,7 +343,6 @@ static void do_btree_node_write(struct btree *b) ...@@ -343,7 +343,6 @@ static void do_btree_node_write(struct btree *b)
memcpy(page_address(bv->bv_page), memcpy(page_address(bv->bv_page),
base + j * PAGE_SIZE, PAGE_SIZE); base + j * PAGE_SIZE, PAGE_SIZE);
trace_bcache_btree_write(b->bio);
bch_submit_bbio(b->bio, b->c, &k.key, 0); bch_submit_bbio(b->bio, b->c, &k.key, 0);
continue_at(cl, btree_node_write_done, NULL); continue_at(cl, btree_node_write_done, NULL);
...@@ -351,7 +350,6 @@ static void do_btree_node_write(struct btree *b) ...@@ -351,7 +350,6 @@ static void do_btree_node_write(struct btree *b)
b->bio->bi_vcnt = 0; b->bio->bi_vcnt = 0;
bch_bio_map(b->bio, i); bch_bio_map(b->bio, i);
trace_bcache_btree_write(b->bio);
bch_submit_bbio(b->bio, b->c, &k.key, 0); bch_submit_bbio(b->bio, b->c, &k.key, 0);
closure_sync(cl); closure_sync(cl);
...@@ -363,10 +361,13 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) ...@@ -363,10 +361,13 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
{ {
struct bset *i = b->sets[b->nsets].data; struct bset *i = b->sets[b->nsets].data;
trace_bcache_btree_write(b);
BUG_ON(current->bio_list); BUG_ON(current->bio_list);
BUG_ON(b->written >= btree_blocks(b)); BUG_ON(b->written >= btree_blocks(b));
BUG_ON(b->written && !i->keys); BUG_ON(b->written && !i->keys);
BUG_ON(b->sets->data->seq != i->seq); BUG_ON(b->sets->data->seq != i->seq);
bch_check_key_order(b, i);
cancel_delayed_work(&b->work); cancel_delayed_work(&b->work);
...@@ -376,12 +377,8 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) ...@@ -376,12 +377,8 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
clear_bit(BTREE_NODE_dirty, &b->flags); clear_bit(BTREE_NODE_dirty, &b->flags);
change_bit(BTREE_NODE_write_idx, &b->flags); change_bit(BTREE_NODE_write_idx, &b->flags);
bch_check_key_order(b, i);
do_btree_node_write(b); do_btree_node_write(b);
pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys);
b->written += set_blocks(i, b->c); b->written += set_blocks(i, b->c);
atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
&PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
...@@ -752,6 +749,8 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k, ...@@ -752,6 +749,8 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
int ret = -ENOMEM; int ret = -ENOMEM;
struct btree *i; struct btree *i;
trace_bcache_btree_cache_cannibalize(c);
if (!cl) if (!cl)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
...@@ -770,7 +769,6 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k, ...@@ -770,7 +769,6 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
} }
/* XXX: tracepoint */
c->try_harder = cl; c->try_harder = cl;
c->try_harder_start = local_clock(); c->try_harder_start = local_clock();
retry: retry:
...@@ -956,13 +954,14 @@ static void btree_node_free(struct btree *b, struct btree_op *op) ...@@ -956,13 +954,14 @@ static void btree_node_free(struct btree *b, struct btree_op *op)
{ {
unsigned i; unsigned i;
trace_bcache_btree_node_free(b);
/* /*
* The BUG_ON() in btree_node_get() implies that we must have a write * The BUG_ON() in btree_node_get() implies that we must have a write
* lock on parent to free or even invalidate a node * lock on parent to free or even invalidate a node
*/ */
BUG_ON(op->lock <= b->level); BUG_ON(op->lock <= b->level);
BUG_ON(b == b->c->root); BUG_ON(b == b->c->root);
pr_debug("bucket %s", pbtree(b));
if (btree_node_dirty(b)) if (btree_node_dirty(b))
btree_complete_write(b, btree_current_write(b)); btree_complete_write(b, btree_current_write(b));
...@@ -1012,12 +1011,16 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level, ...@@ -1012,12 +1011,16 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level,
bch_bset_init_next(b); bch_bset_init_next(b);
mutex_unlock(&c->bucket_lock); mutex_unlock(&c->bucket_lock);
trace_bcache_btree_node_alloc(b);
return b; return b;
err_free: err_free:
bch_bucket_free(c, &k.key); bch_bucket_free(c, &k.key);
__bkey_put(c, &k.key); __bkey_put(c, &k.key);
err: err:
mutex_unlock(&c->bucket_lock); mutex_unlock(&c->bucket_lock);
trace_bcache_btree_node_alloc_fail(b);
return b; return b;
} }
...@@ -1254,7 +1257,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op, ...@@ -1254,7 +1257,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op,
btree_node_free(r->b, op); btree_node_free(r->b, op);
up_write(&r->b->lock); up_write(&r->b->lock);
pr_debug("coalesced %u nodes", nodes); trace_bcache_btree_gc_coalesce(nodes);
gc->nodes--; gc->nodes--;
nodes--; nodes--;
...@@ -1479,8 +1482,7 @@ static void bch_btree_gc(struct closure *cl) ...@@ -1479,8 +1482,7 @@ static void bch_btree_gc(struct closure *cl)
struct btree_op op; struct btree_op op;
uint64_t start_time = local_clock(); uint64_t start_time = local_clock();
trace_bcache_gc_start(c->sb.set_uuid); trace_bcache_gc_start(c);
blktrace_msg_all(c, "Starting gc");
memset(&stats, 0, sizeof(struct gc_stat)); memset(&stats, 0, sizeof(struct gc_stat));
closure_init_stack(&writes); closure_init_stack(&writes);
...@@ -1496,9 +1498,7 @@ static void bch_btree_gc(struct closure *cl) ...@@ -1496,9 +1498,7 @@ static void bch_btree_gc(struct closure *cl)
closure_sync(&writes); closure_sync(&writes);
if (ret) { if (ret) {
blktrace_msg_all(c, "Stopped gc");
pr_warn("gc failed!"); pr_warn("gc failed!");
continue_at(cl, bch_btree_gc, bch_gc_wq); continue_at(cl, bch_btree_gc, bch_gc_wq);
} }
...@@ -1519,8 +1519,7 @@ static void bch_btree_gc(struct closure *cl) ...@@ -1519,8 +1519,7 @@ static void bch_btree_gc(struct closure *cl)
stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets; stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets;
memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat)); memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
blktrace_msg_all(c, "Finished gc"); trace_bcache_gc_end(c);
trace_bcache_gc_end(c->sb.set_uuid);
continue_at(cl, bch_moving_gc, bch_gc_wq); continue_at(cl, bch_moving_gc, bch_gc_wq);
} }
...@@ -1901,12 +1900,11 @@ static int btree_split(struct btree *b, struct btree_op *op) ...@@ -1901,12 +1900,11 @@ static int btree_split(struct btree *b, struct btree_op *op)
split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5; split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5;
pr_debug("%ssplitting at %s keys %i", split ? "" : "not ",
pbtree(b), n1->sets[0].data->keys);
if (split) { if (split) {
unsigned keys = 0; unsigned keys = 0;
trace_bcache_btree_node_split(b, n1->sets[0].data->keys);
n2 = bch_btree_node_alloc(b->c, b->level, &op->cl); n2 = bch_btree_node_alloc(b->c, b->level, &op->cl);
if (IS_ERR(n2)) if (IS_ERR(n2))
goto err_free1; goto err_free1;
...@@ -1941,8 +1939,11 @@ static int btree_split(struct btree *b, struct btree_op *op) ...@@ -1941,8 +1939,11 @@ static int btree_split(struct btree *b, struct btree_op *op)
bch_keylist_add(&op->keys, &n2->key); bch_keylist_add(&op->keys, &n2->key);
bch_btree_node_write(n2, &op->cl); bch_btree_node_write(n2, &op->cl);
rw_unlock(true, n2); rw_unlock(true, n2);
} else } else {
trace_bcache_btree_node_compact(b, n1->sets[0].data->keys);
bch_btree_insert_keys(n1, op); bch_btree_insert_keys(n1, op);
}
bch_keylist_add(&op->keys, &n1->key); bch_keylist_add(&op->keys, &n1->key);
bch_btree_node_write(n1, &op->cl); bch_btree_node_write(n1, &op->cl);
...@@ -2117,6 +2118,8 @@ void bch_btree_set_root(struct btree *b) ...@@ -2117,6 +2118,8 @@ void bch_btree_set_root(struct btree *b)
{ {
unsigned i; unsigned i;
trace_bcache_btree_set_root(b);
BUG_ON(!b->written); BUG_ON(!b->written);
for (i = 0; i < KEY_PTRS(&b->key); i++) for (i = 0; i < KEY_PTRS(&b->key); i++)
...@@ -2130,7 +2133,6 @@ void bch_btree_set_root(struct btree *b) ...@@ -2130,7 +2133,6 @@ void bch_btree_set_root(struct btree *b)
__bkey_put(b->c, &b->key); __bkey_put(b->c, &b->key);
bch_journal_meta(b->c, NULL); bch_journal_meta(b->c, NULL);
pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0));
} }
/* Cache lookup */ /* Cache lookup */
...@@ -2216,7 +2218,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op, ...@@ -2216,7 +2218,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op,
n->bi_end_io = bch_cache_read_endio; n->bi_end_io = bch_cache_read_endio;
n->bi_private = &s->cl; n->bi_private = &s->cl;
trace_bcache_cache_hit(n);
__bch_submit_bbio(n, b->c); __bch_submit_bbio(n, b->c);
} }
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include "bset.h" #include "bset.h"
#include "debug.h" #include "debug.h"
#include <linux/blkdev.h>
static void bch_bi_idx_hack_endio(struct bio *bio, int error) static void bch_bi_idx_hack_endio(struct bio *bio, int error)
{ {
struct bio *p = bio->bi_private; struct bio *p = bio->bi_private;
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include "debug.h" #include "debug.h"
#include "request.h" #include "request.h"
#include <trace/events/bcache.h>
/* /*
* Journal replay/recovery: * Journal replay/recovery:
* *
...@@ -300,7 +302,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list, ...@@ -300,7 +302,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list,
for (k = i->j.start; for (k = i->j.start;
k < end(&i->j); k < end(&i->j);
k = bkey_next(k)) { k = bkey_next(k)) {
pr_debug("%s", pkey(k)); trace_bcache_journal_replay_key(k);
bkey_copy(op->keys.top, k); bkey_copy(op->keys.top, k);
bch_keylist_push(&op->keys); bch_keylist_push(&op->keys);
...@@ -712,7 +715,8 @@ void bch_journal(struct closure *cl) ...@@ -712,7 +715,8 @@ void bch_journal(struct closure *cl)
spin_lock(&c->journal.lock); spin_lock(&c->journal.lock);
if (journal_full(&c->journal)) { if (journal_full(&c->journal)) {
/* XXX: tracepoint */ trace_bcache_journal_full(c);
closure_wait(&c->journal.wait, cl); closure_wait(&c->journal.wait, cl);
journal_reclaim(c); journal_reclaim(c);
...@@ -728,13 +732,15 @@ void bch_journal(struct closure *cl) ...@@ -728,13 +732,15 @@ void bch_journal(struct closure *cl)
if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS || if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS ||
b > c->journal.blocks_free) { b > c->journal.blocks_free) {
/* XXX: If we were inserting so many keys that they won't fit in trace_bcache_journal_entry_full(c);
/*
* XXX: If we were inserting so many keys that they won't fit in
* an _empty_ journal write, we'll deadlock. For now, handle * an _empty_ journal write, we'll deadlock. For now, handle
* this in bch_keylist_realloc() - but something to think about. * this in bch_keylist_realloc() - but something to think about.
*/ */
BUG_ON(!w->data->keys); BUG_ON(!w->data->keys);
/* XXX: tracepoint */
BUG_ON(!closure_wait(&w->wait, cl)); BUG_ON(!closure_wait(&w->wait, cl));
closure_flush(&c->journal.io); closure_flush(&c->journal.io);
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include "debug.h" #include "debug.h"
#include "request.h" #include "request.h"
#include <trace/events/bcache.h>
struct moving_io { struct moving_io {
struct keybuf_key *w; struct keybuf_key *w;
struct search s; struct search s;
...@@ -49,9 +51,8 @@ static void write_moving_finish(struct closure *cl) ...@@ -49,9 +51,8 @@ static void write_moving_finish(struct closure *cl)
while (bv-- != bio->bi_io_vec) while (bv-- != bio->bi_io_vec)
__free_page(bv->bv_page); __free_page(bv->bv_page);
pr_debug("%s %s", io->s.op.insert_collision if (io->s.op.insert_collision)
? "collision moving" : "moved", trace_bcache_gc_copy_collision(&io->w->key);
pkey(&io->w->key));
bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w); bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w);
...@@ -94,8 +95,6 @@ static void write_moving(struct closure *cl) ...@@ -94,8 +95,6 @@ static void write_moving(struct closure *cl)
struct moving_io *io = container_of(s, struct moving_io, s); struct moving_io *io = container_of(s, struct moving_io, s);
if (!s->error) { if (!s->error) {
trace_bcache_write_moving(&io->bio.bio);
moving_init(io); moving_init(io);
io->bio.bio.bi_sector = KEY_START(&io->w->key); io->bio.bio.bi_sector = KEY_START(&io->w->key);
...@@ -122,7 +121,6 @@ static void read_moving_submit(struct closure *cl) ...@@ -122,7 +121,6 @@ static void read_moving_submit(struct closure *cl)
struct moving_io *io = container_of(s, struct moving_io, s); struct moving_io *io = container_of(s, struct moving_io, s);
struct bio *bio = &io->bio.bio; struct bio *bio = &io->bio.bio;
trace_bcache_read_moving(bio);
bch_submit_bbio(bio, s->op.c, &io->w->key, 0); bch_submit_bbio(bio, s->op.c, &io->w->key, 0);
continue_at(cl, write_moving, bch_gc_wq); continue_at(cl, write_moving, bch_gc_wq);
...@@ -162,7 +160,7 @@ static void read_moving(struct closure *cl) ...@@ -162,7 +160,7 @@ static void read_moving(struct closure *cl)
if (bch_bio_alloc_pages(bio, GFP_KERNEL)) if (bch_bio_alloc_pages(bio, GFP_KERNEL))
goto err; goto err;
pr_debug("%s", pkey(&w->key)); trace_bcache_gc_copy(&w->key);
closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl); closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl);
......
...@@ -530,10 +530,9 @@ static void bch_insert_data_loop(struct closure *cl) ...@@ -530,10 +530,9 @@ static void bch_insert_data_loop(struct closure *cl)
if (KEY_CSUM(k)) if (KEY_CSUM(k))
bio_csum(n, k); bio_csum(n, k);
pr_debug("%s", pkey(k)); trace_bcache_cache_insert(k);
bch_keylist_push(&op->keys); bch_keylist_push(&op->keys);
trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev);
n->bi_rw |= REQ_WRITE; n->bi_rw |= REQ_WRITE;
bch_submit_bbio(n, op->c, k, 0); bch_submit_bbio(n, op->c, k, 0);
} while (n != bio); } while (n != bio);
...@@ -784,11 +783,8 @@ static void request_read_error(struct closure *cl) ...@@ -784,11 +783,8 @@ static void request_read_error(struct closure *cl)
int i; int i;
if (s->recoverable) { if (s->recoverable) {
/* The cache read failed, but we can retry from the backing /* Retry from the backing device: */
* device. trace_bcache_read_retry(s->orig_bio);
*/
pr_debug("recovering at sector %llu",
(uint64_t) s->orig_bio->bi_sector);
s->error = 0; s->error = 0;
bv = s->bio.bio.bi_io_vec; bv = s->bio.bio.bi_io_vec;
...@@ -806,7 +802,6 @@ static void request_read_error(struct closure *cl) ...@@ -806,7 +802,6 @@ static void request_read_error(struct closure *cl)
/* XXX: invalidate cache */ /* XXX: invalidate cache */
trace_bcache_read_retry(&s->bio.bio);
closure_bio_submit(&s->bio.bio, &s->cl, s->d); closure_bio_submit(&s->bio.bio, &s->cl, s->d);
} }
...@@ -899,6 +894,7 @@ static void request_read_done_bh(struct closure *cl) ...@@ -899,6 +894,7 @@ static void request_read_done_bh(struct closure *cl)
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip); bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip);
trace_bcache_read(s->orig_bio, !s->cache_miss, s->op.skip);
if (s->error) if (s->error)
continue_at_nobarrier(cl, request_read_error, bcache_wq); continue_at_nobarrier(cl, request_read_error, bcache_wq);
...@@ -969,7 +965,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, ...@@ -969,7 +965,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
s->cache_miss = miss; s->cache_miss = miss;
bio_get(s->op.cache_bio); bio_get(s->op.cache_bio);
trace_bcache_cache_miss(s->orig_bio);
closure_bio_submit(s->op.cache_bio, &s->cl, s->d); closure_bio_submit(s->op.cache_bio, &s->cl, s->d);
return ret; return ret;
...@@ -1040,15 +1035,15 @@ static void request_write(struct cached_dev *dc, struct search *s) ...@@ -1040,15 +1035,15 @@ static void request_write(struct cached_dev *dc, struct search *s)
if (should_writeback(dc, s->orig_bio)) if (should_writeback(dc, s->orig_bio))
s->writeback = true; s->writeback = true;
trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
if (!s->writeback) { if (!s->writeback) {
s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
dc->disk.bio_split); dc->disk.bio_split);
trace_bcache_writethrough(s->orig_bio);
closure_bio_submit(bio, cl, s->d); closure_bio_submit(bio, cl, s->d);
} else { } else {
s->op.cache_bio = bio; s->op.cache_bio = bio;
trace_bcache_writeback(s->orig_bio);
bch_writeback_add(dc, bio_sectors(bio)); bch_writeback_add(dc, bio_sectors(bio));
} }
out: out:
...@@ -1058,7 +1053,6 @@ static void request_write(struct cached_dev *dc, struct search *s) ...@@ -1058,7 +1053,6 @@ static void request_write(struct cached_dev *dc, struct search *s)
s->op.skip = true; s->op.skip = true;
s->op.cache_bio = s->orig_bio; s->op.cache_bio = s->orig_bio;
bio_get(s->op.cache_bio); bio_get(s->op.cache_bio);
trace_bcache_write_skip(s->orig_bio);
if ((bio->bi_rw & REQ_DISCARD) && if ((bio->bi_rw & REQ_DISCARD) &&
!blk_queue_discard(bdev_get_queue(dc->bdev))) !blk_queue_discard(bdev_get_queue(dc->bdev)))
...@@ -1088,9 +1082,10 @@ static void request_nodata(struct cached_dev *dc, struct search *s) ...@@ -1088,9 +1082,10 @@ static void request_nodata(struct cached_dev *dc, struct search *s)
/* Cached devices - read & write stuff */ /* Cached devices - read & write stuff */
int bch_get_congested(struct cache_set *c) unsigned bch_get_congested(struct cache_set *c)
{ {
int i; int i;
long rand;
if (!c->congested_read_threshold_us && if (!c->congested_read_threshold_us &&
!c->congested_write_threshold_us) !c->congested_write_threshold_us)
...@@ -1106,7 +1101,13 @@ int bch_get_congested(struct cache_set *c) ...@@ -1106,7 +1101,13 @@ int bch_get_congested(struct cache_set *c)
i += CONGESTED_MAX; i += CONGESTED_MAX;
return i <= 0 ? 1 : fract_exp_two(i, 6); if (i > 0)
i = fract_exp_two(i, 6);
rand = get_random_int();
i -= bitmap_weight(&rand, BITS_PER_LONG);
return i > 0 ? i : 1;
} }
static void add_sequential(struct task_struct *t) static void add_sequential(struct task_struct *t)
...@@ -1126,10 +1127,8 @@ static void check_should_skip(struct cached_dev *dc, struct search *s) ...@@ -1126,10 +1127,8 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
{ {
struct cache_set *c = s->op.c; struct cache_set *c = s->op.c;
struct bio *bio = &s->bio.bio; struct bio *bio = &s->bio.bio;
long rand;
int cutoff = bch_get_congested(c);
unsigned mode = cache_mode(dc, bio); unsigned mode = cache_mode(dc, bio);
unsigned sectors, congested = bch_get_congested(c);
if (atomic_read(&dc->disk.detaching) || if (atomic_read(&dc->disk.detaching) ||
c->gc_stats.in_use > CUTOFF_CACHE_ADD || c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
...@@ -1147,17 +1146,14 @@ static void check_should_skip(struct cached_dev *dc, struct search *s) ...@@ -1147,17 +1146,14 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
goto skip; goto skip;
} }
if (!cutoff) { if (!congested && !dc->sequential_cutoff)
cutoff = dc->sequential_cutoff >> 9; goto rescale;
if (!cutoff) if (!congested &&
goto rescale; mode == CACHE_MODE_WRITEBACK &&
(bio->bi_rw & REQ_WRITE) &&
if (mode == CACHE_MODE_WRITEBACK && (bio->bi_rw & REQ_SYNC))
(bio->bi_rw & REQ_WRITE) && goto rescale;
(bio->bi_rw & REQ_SYNC))
goto rescale;
}
if (dc->sequential_merge) { if (dc->sequential_merge) {
struct io *i; struct io *i;
...@@ -1192,12 +1188,19 @@ static void check_should_skip(struct cached_dev *dc, struct search *s) ...@@ -1192,12 +1188,19 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
add_sequential(s->task); add_sequential(s->task);
} }
rand = get_random_int(); sectors = max(s->task->sequential_io,
cutoff -= bitmap_weight(&rand, BITS_PER_LONG); s->task->sequential_io_avg) >> 9;
if (cutoff <= (int) (max(s->task->sequential_io, if (dc->sequential_cutoff &&
s->task->sequential_io_avg) >> 9)) sectors >= dc->sequential_cutoff >> 9) {
trace_bcache_bypass_sequential(s->orig_bio);
goto skip; goto skip;
}
if (congested && sectors >= congested) {
trace_bcache_bypass_congested(s->orig_bio);
goto skip;
}
rescale: rescale:
bch_rescale_priorities(c, bio_sectors(bio)); bch_rescale_priorities(c, bio_sectors(bio));
......
...@@ -30,7 +30,7 @@ struct search { ...@@ -30,7 +30,7 @@ struct search {
}; };
void bch_cache_read_endio(struct bio *, int); void bch_cache_read_endio(struct bio *, int);
int bch_get_congested(struct cache_set *); unsigned bch_get_congested(struct cache_set *);
void bch_insert_data(struct closure *cl); void bch_insert_data(struct closure *cl);
void bch_btree_insert_async(struct closure *); void bch_btree_insert_async(struct closure *);
void bch_cache_read_endio(struct bio *, int); void bch_cache_read_endio(struct bio *, int);
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "debug.h" #include "debug.h"
#include "request.h" #include "request.h"
#include <linux/blkdev.h>
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/genhd.h> #include <linux/genhd.h>
...@@ -543,7 +544,6 @@ void bch_prio_write(struct cache *ca) ...@@ -543,7 +544,6 @@ void bch_prio_write(struct cache *ca)
pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free), pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
fifo_used(&ca->free_inc), fifo_used(&ca->unused)); fifo_used(&ca->free_inc), fifo_used(&ca->unused));
blktrace_msg(ca, "Starting priorities: " buckets_free(ca));
for (i = prio_buckets(ca) - 1; i >= 0; --i) { for (i = prio_buckets(ca) - 1; i >= 0; --i) {
long bucket; long bucket;
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "btree.h" #include "btree.h"
#include "request.h" #include "request.h"
#include <linux/blkdev.h>
#include <linux/sort.h> #include <linux/sort.h>
static const char * const cache_replacement_policies[] = { static const char * const cache_replacement_policies[] = {
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include "btree.h" #include "btree.h"
#include "request.h" #include "request.h"
#include <linux/blktrace_api.h>
#include <linux/module.h> #include <linux/module.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
...@@ -9,18 +10,42 @@ ...@@ -9,18 +10,42 @@
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_passthrough);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_hit); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_sequential);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_miss); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_congested);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writethrough);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_skip);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_replay_key);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_full);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_entry_full);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_cache_cannibalize);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_dirty);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_dirty); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc_fail);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_free);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_gc_coalesce);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback_collision);
...@@ -15,8 +15,6 @@ ...@@ -15,8 +15,6 @@
struct closure; struct closure;
#include <trace/events/bcache.h>
#ifdef CONFIG_BCACHE_EDEBUG #ifdef CONFIG_BCACHE_EDEBUG
#define atomic_dec_bug(v) BUG_ON(atomic_dec_return(v) < 0) #define atomic_dec_bug(v) BUG_ON(atomic_dec_return(v) < 0)
......
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
#include "btree.h" #include "btree.h"
#include "debug.h" #include "debug.h"
#include <trace/events/bcache.h>
static struct workqueue_struct *dirty_wq; static struct workqueue_struct *dirty_wq;
static void read_dirty(struct closure *); static void read_dirty(struct closure *);
...@@ -236,10 +238,12 @@ static void write_dirty_finish(struct closure *cl) ...@@ -236,10 +238,12 @@ static void write_dirty_finish(struct closure *cl)
for (i = 0; i < KEY_PTRS(&w->key); i++) for (i = 0; i < KEY_PTRS(&w->key); i++)
atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin); atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin);
pr_debug("clearing %s", pkey(&w->key));
bch_btree_insert(&op, dc->disk.c); bch_btree_insert(&op, dc->disk.c);
closure_sync(&op.cl); closure_sync(&op.cl);
if (op.insert_collision)
trace_bcache_writeback_collision(&w->key);
atomic_long_inc(op.insert_collision atomic_long_inc(op.insert_collision
? &dc->disk.c->writeback_keys_failed ? &dc->disk.c->writeback_keys_failed
: &dc->disk.c->writeback_keys_done); : &dc->disk.c->writeback_keys_done);
...@@ -275,7 +279,6 @@ static void write_dirty(struct closure *cl) ...@@ -275,7 +279,6 @@ static void write_dirty(struct closure *cl)
io->bio.bi_bdev = io->dc->bdev; io->bio.bi_bdev = io->dc->bdev;
io->bio.bi_end_io = dirty_endio; io->bio.bi_end_io = dirty_endio;
trace_bcache_write_dirty(&io->bio);
closure_bio_submit(&io->bio, cl, &io->dc->disk); closure_bio_submit(&io->bio, cl, &io->dc->disk);
continue_at(cl, write_dirty_finish, dirty_wq); continue_at(cl, write_dirty_finish, dirty_wq);
...@@ -296,7 +299,6 @@ static void read_dirty_submit(struct closure *cl) ...@@ -296,7 +299,6 @@ static void read_dirty_submit(struct closure *cl)
{ {
struct dirty_io *io = container_of(cl, struct dirty_io, cl); struct dirty_io *io = container_of(cl, struct dirty_io, cl);
trace_bcache_read_dirty(&io->bio);
closure_bio_submit(&io->bio, cl, &io->dc->disk); closure_bio_submit(&io->bio, cl, &io->dc->disk);
continue_at(cl, write_dirty, dirty_wq); continue_at(cl, write_dirty, dirty_wq);
...@@ -352,7 +354,7 @@ static void read_dirty(struct closure *cl) ...@@ -352,7 +354,7 @@ static void read_dirty(struct closure *cl)
if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL))
goto err_free; goto err_free;
pr_debug("%s", pkey(&w->key)); trace_bcache_writeback(&w->key);
closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment