Commit 53d8ab29 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.14/drivers' of git://git.kernel.dk/linux-block

Pull block IO driver changes from Jens Axboe:

 - bcache update from Kent Overstreet.

 - two bcache fixes from Nicholas Swenson.

 - cciss pci init error fix from Andrew.

 - underflow fix in the parallel IDE pg_write code from Dan Carpenter.
   I'm sure the 1 (or 0) users of that are now happy.

 - two PCI related fixes for sx8 from Jingoo Han.

 - floppy init fix for first block read from Jiri Kosina.

 - pktcdvd error return miss fix from Julia Lawall.

 - removal of IRQF_SHARED from the SEGA Dreamcast CD-ROM code from
   Michael Opdenacker.

 - comment typo fix for the loop driver from Olaf Hering.

 - potential oops fix for null_blk from Raghavendra K T.

 - two fixes from Sam Bradshaw (Micron) for the mtip32xx driver, fixing
   an OOM problem and a problem with handling security locked conditions

* 'for-3.14/drivers' of git://git.kernel.dk/linux-block: (47 commits)
  mg_disk: Spelling s/finised/finished/
  null_blk: Null pointer deference problem in alloc_page_buffers
  mtip32xx: Correctly handle security locked condition
  mtip32xx: Make SGL container per-command to eliminate high order dma allocation
  drivers/block/loop.c: fix comment typo in loop_config_discard
  drivers/block/cciss.c:cciss_init_one(): use proper errnos
  drivers/block/paride/pg.c: underflow bug in pg_write()
  drivers/block/sx8.c: remove unnecessary pci_set_drvdata()
  drivers/block/sx8.c: use module_pci_driver()
  floppy: bail out in open() if drive is not responding to block0 read
  bcache: Fix auxiliary search trees for key size > cacheline size
  bcache: Don't return -EINTR when insert finished
  bcache: Improve bucket_prio() calculation
  bcache: Add bch_bkey_equal_header()
  bcache: update bch_bkey_try_merge
  bcache: Move insert_fixup() to btree_keys_ops
  bcache: Convert sorting to btree_keys
  bcache: Convert debug code to btree_keys
  bcache: Convert btree_iter to struct btree_keys
  bcache: Refactor bset_tree sysfs stats
  ...
parents f568849e 14424be4
......@@ -592,6 +592,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
ret = -1;
}
t->raid_partial_stripes_expensive =
max(t->raid_partial_stripes_expensive,
b->raid_partial_stripes_expensive);
/* Find lowest common alignment_offset */
t->alignment_offset = lcm(t->alignment_offset, alignment)
& (max(t->physical_block_size, t->io_min) - 1);
......
......@@ -5004,7 +5004,7 @@ static int cciss_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
i = alloc_cciss_hba(pdev);
if (i < 0)
return -1;
return -ENOMEM;
h = hba[i];
h->pdev = pdev;
......@@ -5205,7 +5205,7 @@ static int cciss_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
*/
pci_set_drvdata(pdev, NULL);
free_hba(h);
return -1;
return -ENODEV;
}
static void cciss_shutdown(struct pci_dev *pdev)
......
......@@ -3691,9 +3691,12 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
if (!(mode & FMODE_NDELAY)) {
if (mode & (FMODE_READ|FMODE_WRITE)) {
UDRS->last_checked = 0;
clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
check_disk_change(bdev);
if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
goto out;
if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
goto out;
}
res = -EROFS;
if ((mode & FMODE_WRITE) &&
......@@ -3746,17 +3749,29 @@ static unsigned int floppy_check_events(struct gendisk *disk,
* a disk in the drive, and whether that disk is writable.
*/
static void floppy_rb0_complete(struct bio *bio, int err)
struct rb0_cbdata {
int drive;
struct completion complete;
};
static void floppy_rb0_cb(struct bio *bio, int err)
{
complete((struct completion *)bio->bi_private);
struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
int drive = cbdata->drive;
if (err) {
pr_info("floppy: error %d while reading block 0", err);
set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
}
complete(&cbdata->complete);
}
static int __floppy_read_block_0(struct block_device *bdev)
static int __floppy_read_block_0(struct block_device *bdev, int drive)
{
struct bio bio;
struct bio_vec bio_vec;
struct completion complete;
struct page *page;
struct rb0_cbdata cbdata;
size_t size;
page = alloc_page(GFP_NOIO);
......@@ -3769,6 +3784,8 @@ static int __floppy_read_block_0(struct block_device *bdev)
if (!size)
size = 1024;
cbdata.drive = drive;
bio_init(&bio);
bio.bi_io_vec = &bio_vec;
bio_vec.bv_page = page;
......@@ -3779,13 +3796,14 @@ static int __floppy_read_block_0(struct block_device *bdev)
bio.bi_bdev = bdev;
bio.bi_iter.bi_sector = 0;
bio.bi_flags = (1 << BIO_QUIET);
init_completion(&complete);
bio.bi_private = &complete;
bio.bi_end_io = floppy_rb0_complete;
bio.bi_private = &cbdata;
bio.bi_end_io = floppy_rb0_cb;
submit_bio(READ, &bio);
process_fd_request();
wait_for_completion(&complete);
init_completion(&cbdata.complete);
wait_for_completion(&cbdata.complete);
__free_page(page);
......@@ -3827,7 +3845,7 @@ static int floppy_revalidate(struct gendisk *disk)
UDRS->generation++;
if (drive_no_geom(drive)) {
/* auto-sensing */
res = __floppy_read_block_0(opened_bdev[drive]);
res = __floppy_read_block_0(opened_bdev[drive], drive);
} else {
if (cf)
poll_drive(false, FD_RAW_NEED_DISK);
......
......@@ -799,7 +799,7 @@ static void loop_config_discard(struct loop_device *lo)
/*
* We use punch hole to reclaim the free space used by the
* image a.k.a. discard. However we do support discard if
* image a.k.a. discard. However we do not support discard if
* encryption is enabled, because it may give an attacker
* useful information.
*/
......
......@@ -915,7 +915,7 @@ static int mg_probe(struct platform_device *plat_dev)
/* disk reset */
if (prv_data->dev_attr == MG_STORAGE_DEV) {
/* If POR seq. not yet finised, wait */
/* If POR seq. not yet finished, wait */
err = mg_wait_rstout(host->rstout, MG_TMAX_RSTOUT);
if (err)
goto probe_err_3b;
......
This diff is collapsed.
......@@ -69,7 +69,7 @@
* Maximum number of scatter gather entries
* a single command may have.
*/
#define MTIP_MAX_SG 128
#define MTIP_MAX_SG 504
/*
* Maximum number of slot groups (Command Issue & s_active registers)
......@@ -92,7 +92,7 @@
/* Driver name and version strings */
#define MTIP_DRV_NAME "mtip32xx"
#define MTIP_DRV_VERSION "1.2.6os3"
#define MTIP_DRV_VERSION "1.3.0"
/* Maximum number of minor device numbers per device. */
#define MTIP_MAX_MINORS 16
......@@ -391,15 +391,13 @@ struct mtip_port {
*/
dma_addr_t rxfis_dma;
/*
* Pointer to the beginning of the command table memory as used
* by the driver.
* Pointer to the DMA region for RX Fis, Identify, RLE10, and SMART
*/
void *command_table;
void *block1;
/*
* Pointer to the beginning of the command table memory as used
* by the DMA.
* DMA address of region for RX Fis, Identify, RLE10, and SMART
*/
dma_addr_t command_tbl_dma;
dma_addr_t block1_dma;
/*
* Pointer to the beginning of the identify data memory as used
* by the driver.
......
......@@ -616,6 +616,11 @@ static int __init null_init(void)
irqmode = NULL_IRQ_NONE;
}
#endif
if (bs > PAGE_SIZE) {
pr_warn("null_blk: invalid block size\n");
pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
bs = PAGE_SIZE;
}
if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
if (submit_queues < nr_online_nodes) {
......
......@@ -581,7 +581,7 @@ static ssize_t pg_write(struct file *filp, const char __user *buf, size_t count,
if (hdr.magic != PG_MAGIC)
return -EINVAL;
if (hdr.dlen > PG_MAX_DATA)
if (hdr.dlen < 0 || hdr.dlen > PG_MAX_DATA)
return -EINVAL;
if ((count - hs) > PG_MAX_DATA)
return -EINVAL;
......
......@@ -706,7 +706,9 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
WRITE : READ, __GFP_WAIT);
if (cgc->buflen) {
if (blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, __GFP_WAIT))
ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
__GFP_WAIT);
if (ret)
goto out;
}
......
......@@ -1744,20 +1744,6 @@ static void carm_remove_one (struct pci_dev *pdev)
kfree(host);
pci_release_regions(pdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
}
static int __init carm_init(void)
{
return pci_register_driver(&carm_driver);
}
static void __exit carm_exit(void)
{
pci_unregister_driver(&carm_driver);
}
module_init(carm_init);
module_exit(carm_exit);
module_pci_driver(carm_driver);
......@@ -561,11 +561,11 @@ static int gdrom_set_interrupt_handlers(void)
int err;
err = request_irq(HW_EVENT_GDROM_CMD, gdrom_command_interrupt,
IRQF_DISABLED, "gdrom_command", &gd);
0, "gdrom_command", &gd);
if (err)
return err;
err = request_irq(HW_EVENT_GDROM_DMA, gdrom_dma_interrupt,
IRQF_DISABLED, "gdrom_dma", &gd);
0, "gdrom_dma", &gd);
if (err)
free_irq(HW_EVENT_GDROM_CMD, &gd);
return err;
......
obj-$(CONFIG_BCACHE) += bcache.o
bcache-y := alloc.o btree.o bset.o io.o journal.o writeback.o\
movinggc.o request.o super.o sysfs.o debug.o util.o trace.o stats.o closure.o
bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\
io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
util.o writeback.o
CFLAGS_request.o += -Iblock
......@@ -132,10 +132,16 @@ bool bch_bucket_add_unused(struct cache *ca, struct bucket *b)
{
BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b));
if (fifo_used(&ca->free) > ca->watermark[WATERMARK_MOVINGGC] &&
CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO)
return false;
if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) {
unsigned i;
for (i = 0; i < RESERVE_NONE; i++)
if (!fifo_full(&ca->free[i]))
goto add;
return false;
}
add:
b->prio = 0;
if (can_inc_bucket_gen(b) &&
......@@ -162,8 +168,21 @@ static void invalidate_one_bucket(struct cache *ca, struct bucket *b)
fifo_push(&ca->free_inc, b - ca->buckets);
}
/*
* Determines what order we're going to reuse buckets, smallest bucket_prio()
* first: we also take into account the number of sectors of live data in that
* bucket, and in order for that multiply to make sense we have to scale bucket
*
* Thus, we scale the bucket priorities so that the bucket with the smallest
* prio is worth 1/8th of what INITIAL_PRIO is worth.
*/
#define bucket_prio(b) \
(((unsigned) (b->prio - ca->set->min_prio)) * GC_SECTORS_USED(b))
({ \
unsigned min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
\
(b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \
})
#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r))
#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r))
......@@ -304,6 +323,21 @@ do { \
__set_current_state(TASK_RUNNING); \
} while (0)
static int bch_allocator_push(struct cache *ca, long bucket)
{
unsigned i;
/* Prios/gens are actually the most important reserve */
if (fifo_push(&ca->free[RESERVE_PRIO], bucket))
return true;
for (i = 0; i < RESERVE_NR; i++)
if (fifo_push(&ca->free[i], bucket))
return true;
return false;
}
static int bch_allocator_thread(void *arg)
{
struct cache *ca = arg;
......@@ -336,9 +370,7 @@ static int bch_allocator_thread(void *arg)
mutex_lock(&ca->set->bucket_lock);
}
allocator_wait(ca, !fifo_full(&ca->free));
fifo_push(&ca->free, bucket);
allocator_wait(ca, bch_allocator_push(ca, bucket));
wake_up(&ca->set->bucket_wait);
}
......@@ -365,34 +397,29 @@ static int bch_allocator_thread(void *arg)
}
}
long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait)
long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
{
DEFINE_WAIT(w);
struct bucket *b;
long r;
/* fastpath */
if (fifo_used(&ca->free) > ca->watermark[watermark]) {
fifo_pop(&ca->free, r);
if (fifo_pop(&ca->free[RESERVE_NONE], r) ||
fifo_pop(&ca->free[reserve], r))
goto out;
}
if (!wait)
return -1;
while (1) {
if (fifo_used(&ca->free) > ca->watermark[watermark]) {
fifo_pop(&ca->free, r);
break;
}
do {
prepare_to_wait(&ca->set->bucket_wait, &w,
TASK_UNINTERRUPTIBLE);
mutex_unlock(&ca->set->bucket_lock);
schedule();
mutex_lock(&ca->set->bucket_lock);
}
} while (!fifo_pop(&ca->free[RESERVE_NONE], r) &&
!fifo_pop(&ca->free[reserve], r));
finish_wait(&ca->set->bucket_wait, &w);
out:
......@@ -401,11 +428,13 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait)
if (expensive_debug_checks(ca->set)) {
size_t iter;
long i;
unsigned j;
for (iter = 0; iter < prio_buckets(ca) * 2; iter++)
BUG_ON(ca->prio_buckets[iter] == (uint64_t) r);
fifo_for_each(i, &ca->free, iter)
for (j = 0; j < RESERVE_NR; j++)
fifo_for_each(i, &ca->free[j], iter)
BUG_ON(i == r);
fifo_for_each(i, &ca->free_inc, iter)
BUG_ON(i == r);
......@@ -419,7 +448,7 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait)
SET_GC_SECTORS_USED(b, ca->sb.bucket_size);
if (watermark <= WATERMARK_METADATA) {
if (reserve <= RESERVE_PRIO) {
SET_GC_MARK(b, GC_MARK_METADATA);
SET_GC_MOVE(b, 0);
b->prio = BTREE_PRIO;
......@@ -445,7 +474,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k)
}
}
int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
struct bkey *k, int n, bool wait)
{
int i;
......@@ -459,7 +488,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
for (i = 0; i < n; i++) {
struct cache *ca = c->cache_by_alloc[i];
long b = bch_bucket_alloc(ca, watermark, wait);
long b = bch_bucket_alloc(ca, reserve, wait);
if (b == -1)
goto err;
......@@ -478,12 +507,12 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
return -1;
}
int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
struct bkey *k, int n, bool wait)
{
int ret;
mutex_lock(&c->bucket_lock);
ret = __bch_bucket_alloc_set(c, watermark, k, n, wait);
ret = __bch_bucket_alloc_set(c, reserve, k, n, wait);
mutex_unlock(&c->bucket_lock);
return ret;
}
......@@ -573,8 +602,8 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
unsigned watermark = write_prio
? WATERMARK_MOVINGGC
: WATERMARK_NONE;
? RESERVE_MOVINGGC
: RESERVE_NONE;
spin_unlock(&c->data_bucket_lock);
......@@ -689,7 +718,7 @@ int bch_cache_allocator_init(struct cache *ca)
* Then 8 for btree allocations
* Then half for the moving garbage collector
*/
#if 0
ca->watermark[WATERMARK_PRIO] = 0;
ca->watermark[WATERMARK_METADATA] = prio_buckets(ca);
......@@ -699,6 +728,6 @@ int bch_cache_allocator_init(struct cache *ca)
ca->watermark[WATERMARK_NONE] = ca->free.size / 2 +
ca->watermark[WATERMARK_MOVINGGC];
#endif
return 0;
}
......@@ -187,6 +187,7 @@
#include <linux/types.h>
#include <linux/workqueue.h>
#include "bset.h"
#include "util.h"
#include "closure.h"
......@@ -309,7 +310,8 @@ struct cached_dev {
struct cache_sb sb;
struct bio sb_bio;
struct bio_vec sb_bv[1];
struct closure_with_waitlist sb_write;
struct closure sb_write;
struct semaphore sb_write_mutex;
/* Refcount on the cache set. Always nonzero when we're caching. */
atomic_t count;
......@@ -382,12 +384,12 @@ struct cached_dev {
unsigned writeback_rate_p_term_inverse;
};
enum alloc_watermarks {
WATERMARK_PRIO,
WATERMARK_METADATA,
WATERMARK_MOVINGGC,
WATERMARK_NONE,
WATERMARK_MAX
enum alloc_reserve {
RESERVE_BTREE,
RESERVE_PRIO,
RESERVE_MOVINGGC,
RESERVE_NONE,
RESERVE_NR,
};
struct cache {
......@@ -399,8 +401,6 @@ struct cache {
struct kobject kobj;
struct block_device *bdev;
unsigned watermark[WATERMARK_MAX];
struct task_struct *alloc_thread;
struct closure prio;
......@@ -429,7 +429,7 @@ struct cache {
* because all the data they contained was overwritten), so we only
* need to discard them before they can be moved to the free list.
*/
DECLARE_FIFO(long, free);
DECLARE_FIFO(long, free)[RESERVE_NR];
DECLARE_FIFO(long, free_inc);
DECLARE_FIFO(long, unused);
......@@ -514,7 +514,8 @@ struct cache_set {
uint64_t cached_dev_sectors;
struct closure caching;
struct closure_with_waitlist sb_write;
struct closure sb_write;
struct semaphore sb_write_mutex;
mempool_t *search;
mempool_t *bio_meta;
......@@ -629,13 +630,15 @@ struct cache_set {
#ifdef CONFIG_BCACHE_DEBUG
struct btree *verify_data;
struct bset *verify_ondisk;
struct mutex verify_lock;
#endif
unsigned nr_uuids;
struct uuid_entry *uuids;
BKEY_PADDED(uuid_bucket);
struct closure_with_waitlist uuid_write;
struct closure uuid_write;
struct semaphore uuid_write_mutex;
/*
* A btree node on disk could have too many bsets for an iterator to fit
......@@ -643,13 +646,7 @@ struct cache_set {
*/
mempool_t *fill_iter;
/*
* btree_sort() is a merge sort and requires temporary space - single
* element mempool
*/
struct mutex sort_lock;
struct bset *sort;
unsigned sort_crit_factor;
struct bset_sort_state sort;
/* List of buckets we're currently writing data to */
struct list_head data_buckets;
......@@ -665,7 +662,6 @@ struct cache_set {
unsigned congested_read_threshold_us;
unsigned congested_write_threshold_us;
struct time_stats sort_time;
struct time_stats btree_gc_time;
struct time_stats btree_split_time;
struct time_stats btree_read_time;
......@@ -683,9 +679,9 @@ struct cache_set {
unsigned error_decay;
unsigned short journal_delay_ms;
bool expensive_debug_checks;
unsigned verify:1;
unsigned key_merging_disabled:1;
unsigned expensive_debug_checks:1;
unsigned gc_always_rewrite:1;
unsigned shrinker_disabled:1;
unsigned copy_gc_enabled:1;
......@@ -707,13 +703,8 @@ struct bbio {
struct bio bio;
};
static inline unsigned local_clock_us(void)
{
return local_clock() >> 10;
}
#define BTREE_PRIO USHRT_MAX
#define INITIAL_PRIO 32768
#define INITIAL_PRIO 32768U
#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE)
#define btree_blocks(b) \
......@@ -726,21 +717,6 @@ static inline unsigned local_clock_us(void)
#define bucket_bytes(c) ((c)->sb.bucket_size << 9)
#define block_bytes(c) ((c)->sb.block_size << 9)
#define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t))
#define set_bytes(i) __set_bytes(i, i->keys)
#define __set_blocks(i, k, c) DIV_ROUND_UP(__set_bytes(i, k), block_bytes(c))
#define set_blocks(i, c) __set_blocks(i, (i)->keys, c)
#define node(i, j) ((struct bkey *) ((i)->d + (j)))
#define end(i) node(i, (i)->keys)
#define index(i, b) \
((size_t) (((void *) i - (void *) (b)->sets[0].data) / \
block_bytes(b->c)))
#define btree_data_space(b) (PAGE_SIZE << (b)->page_order)
#define prios_per_bucket(c) \
((bucket_bytes(c) - sizeof(struct prio_set)) / \
sizeof(struct bucket_disk))
......@@ -783,20 +759,34 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c,
return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
}
/* Btree key macros */
static inline uint8_t gen_after(uint8_t a, uint8_t b)
{
uint8_t r = a - b;
return r > 128U ? 0 : r;
}
static inline void bkey_init(struct bkey *k)
static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
unsigned i)
{
*k = ZERO_KEY;
return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
}
static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
unsigned i)
{
return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
}
/* Btree key macros */
/*
* This is used for various on disk data structures - cache_sb, prio_set, bset,
* jset: The checksum is _always_ the first 8 bytes of these structs
*/
#define csum_set(i) \
bch_crc64(((void *) (i)) + sizeof(uint64_t), \
((void *) end(i)) - (((void *) (i)) + sizeof(uint64_t)))
((void *) bset_bkey_last(i)) - \
(((void *) (i)) + sizeof(uint64_t)))
/* Error handling macros */
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -130,20 +130,12 @@ struct btree {
unsigned long flags;
uint16_t written; /* would be nice to kill */
uint8_t level;
uint8_t nsets;
uint8_t page_order;
/*
* Set of sorted keys - the real btree node - plus a binary search tree
*
* sets[0] is special; set[0]->tree, set[0]->prev and set[0]->data point
* to the memory we have allocated for this btree node. Additionally,
* set[0]->data points to the entire btree node as it exists on disk.
*/
struct bset_tree sets[MAX_BSETS];
struct btree_keys keys;
/* For outstanding btree writes, used as a lock - protects write_idx */
struct closure_with_waitlist io;
struct closure io;
struct semaphore io_mutex;
struct list_head list;
struct delayed_work work;
......@@ -179,24 +171,19 @@ static inline struct btree_write *btree_prev_write(struct btree *b)
return b->writes + (btree_node_write_idx(b) ^ 1);
}
static inline unsigned bset_offset(struct btree *b, struct bset *i)
static inline struct bset *btree_bset_first(struct btree *b)
{
return (((size_t) i) - ((size_t) b->sets->data)) >> 9;
return b->keys.set->data;
}
static inline struct bset *write_block(struct btree *b)
static inline struct bset *btree_bset_last(struct btree *b)
{
return ((void *) b->sets[0].data) + b->written * block_bytes(b->c);
return bset_tree_last(&b->keys)->data;
}
static inline bool bset_written(struct btree *b, struct bset_tree *t)
static inline unsigned bset_block_offset(struct btree *b, struct bset *i)
{
return t->data < write_block(b);
}
static inline bool bkey_written(struct btree *b, struct bkey *k)
{
return k < write_block(b)->start;
return bset_sector_offset(&b->keys, i) >> b->c->block_bits;
}
static inline void set_gc_sectors(struct cache_set *c)
......@@ -204,21 +191,6 @@ static inline void set_gc_sectors(struct cache_set *c)
atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 16);
}
static inline struct bkey *bch_btree_iter_init(struct btree *b,
struct btree_iter *iter,
struct bkey *search)
{
return __bch_btree_iter_init(b, iter, search, b->sets);
}
static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k)
{
if (b->level)
return bch_btree_ptr_invalid(b->c, k);
else
return bch_extent_ptr_invalid(b->c, k);
}
void bkey_put(struct cache_set *c, struct bkey *k);
/* Looping macros */
......@@ -229,17 +201,12 @@ void bkey_put(struct cache_set *c, struct bkey *k);
iter++) \
hlist_for_each_entry_rcu((b), (c)->bucket_hash + iter, hash)
#define for_each_key_filter(b, k, iter, filter) \
for (bch_btree_iter_init((b), (iter), NULL); \
((k) = bch_btree_iter_next_filter((iter), b, filter));)
#define for_each_key(b, k, iter) \
for (bch_btree_iter_init((b), (iter), NULL); \
((k) = bch_btree_iter_next(iter));)
/* Recursing down the btree */
struct btree_op {
/* for waiting on btree reserve in btree_split() */
wait_queue_t wait;
/* Btree level at which we start taking write locks */
short lock;
......@@ -249,6 +216,7 @@ struct btree_op {
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
{
memset(op, 0, sizeof(struct btree_op));
init_wait(&op->wait);
op->lock = write_lock_level;
}
......@@ -267,7 +235,7 @@ static inline void rw_unlock(bool w, struct btree *b)
(w ? up_write : up_read)(&b->lock);
}
void bch_btree_node_read(struct btree *);
void bch_btree_node_read_done(struct btree *);
void bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_set_root(struct btree *);
......
......@@ -11,19 +11,6 @@
#include "closure.h"
#define CL_FIELD(type, field) \
case TYPE_ ## type: \
return &container_of(cl, struct type, cl)->field
static struct closure_waitlist *closure_waitlist(struct closure *cl)
{
switch (cl->type) {
CL_FIELD(closure_with_waitlist, wait);
default:
return NULL;
}
}
static inline void closure_put_after_sub(struct closure *cl, int flags)
{
int r = flags & CLOSURE_REMAINING_MASK;
......@@ -42,17 +29,10 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
closure_queue(cl);
} else {
struct closure *parent = cl->parent;
struct closure_waitlist *wait = closure_waitlist(cl);
closure_fn *destructor = cl->fn;
closure_debug_destroy(cl);
smp_mb();
atomic_set(&cl->remaining, -1);
if (wait)
closure_wake_up(wait);
if (destructor)
destructor(cl);
......@@ -69,19 +49,18 @@ void closure_sub(struct closure *cl, int v)
}
EXPORT_SYMBOL(closure_sub);
/**
* closure_put - decrement a closure's refcount
*/
void closure_put(struct closure *cl)
{
closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
}
EXPORT_SYMBOL(closure_put);
static void set_waiting(struct closure *cl, unsigned long f)
{
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
cl->waiting_on = f;
#endif
}
/**
* closure_wake_up - wake up all closures on a wait list, without memory barrier
*/
void __closure_wake_up(struct closure_waitlist *wait_list)
{
struct llist_node *list;
......@@ -106,27 +85,34 @@ void __closure_wake_up(struct closure_waitlist *wait_list)
cl = container_of(reverse, struct closure, list);
reverse = llist_next(reverse);
set_waiting(cl, 0);
closure_set_waiting(cl, 0);
closure_sub(cl, CLOSURE_WAITING + 1);
}
}
EXPORT_SYMBOL(__closure_wake_up);
bool closure_wait(struct closure_waitlist *list, struct closure *cl)
/**
* closure_wait - add a closure to a waitlist
*
* @waitlist will own a ref on @cl, which will be released when
* closure_wake_up() is called on @waitlist.
*
*/
bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
{
if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
return false;
set_waiting(cl, _RET_IP_);
closure_set_waiting(cl, _RET_IP_);
atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
llist_add(&cl->list, &list->list);
llist_add(&cl->list, &waitlist->list);
return true;
}
EXPORT_SYMBOL(closure_wait);
/**
* closure_sync() - sleep until a closure a closure has nothing left to wait on
* closure_sync - sleep until a closure a closure has nothing left to wait on
*
* Sleeps until the refcount hits 1 - the thread that's running the closure owns
* the last refcount.
......@@ -148,46 +134,6 @@ void closure_sync(struct closure *cl)
}
EXPORT_SYMBOL(closure_sync);
/**
* closure_trylock() - try to acquire the closure, without waiting
* @cl: closure to lock
*
* Returns true if the closure was succesfully locked.
*/
bool closure_trylock(struct closure *cl, struct closure *parent)
{
if (atomic_cmpxchg(&cl->remaining, -1,
CLOSURE_REMAINING_INITIALIZER) != -1)
return false;
smp_mb();
cl->parent = parent;
if (parent)
closure_get(parent);
closure_set_ret_ip(cl);
closure_debug_create(cl);
return true;
}
EXPORT_SYMBOL(closure_trylock);
void __closure_lock(struct closure *cl, struct closure *parent,
struct closure_waitlist *wait_list)
{
struct closure wait;
closure_init_stack(&wait);
while (1) {
if (closure_trylock(cl, parent))
return;
closure_wait_event(wait_list, &wait,
atomic_read(&cl->remaining) == -1);
}
}
EXPORT_SYMBOL(__closure_lock);
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
static LIST_HEAD(closure_list);
......
This diff is collapsed.
......@@ -8,6 +8,7 @@
#include "bcache.h"
#include "btree.h"
#include "debug.h"
#include "extents.h"
#include <linux/console.h>
#include <linux/debugfs.h>
......@@ -17,156 +18,88 @@
static struct dentry *debug;
const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
{
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i)) {
struct cache *ca = PTR_CACHE(c, k, i);
size_t bucket = PTR_BUCKET_NR(c, k, i);
size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
if (KEY_SIZE(k) + r > c->sb.bucket_size)
return "bad, length too big";
if (bucket < ca->sb.first_bucket)
return "bad, short offset";
if (bucket >= ca->sb.nbuckets)
return "bad, offset past end of device";
if (ptr_stale(c, k, i))
return "stale";
}
if (!bkey_cmp(k, &ZERO_KEY))
return "bad, null key";
if (!KEY_PTRS(k))
return "bad, no pointers";
if (!KEY_SIZE(k))
return "zeroed key";
return "";
}
int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k)
{
unsigned i = 0;
char *out = buf, *end = buf + size;
#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_OFFSET(k), KEY_SIZE(k));
if (KEY_PTRS(k))
while (1) {
p("%llu:%llu gen %llu",
PTR_DEV(k, i), PTR_OFFSET(k, i), PTR_GEN(k, i));
if (++i == KEY_PTRS(k))
break;
p(", ");
}
p("]");
if (KEY_DIRTY(k))
p(" dirty");
if (KEY_CSUM(k))
p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
#undef p
return out - buf;
}
#ifdef CONFIG_BCACHE_DEBUG
static void dump_bset(struct btree *b, struct bset *i)
{
struct bkey *k, *next;
unsigned j;
char buf[80];
for (k = i->start; k < end(i); k = next) {
next = bkey_next(k);
bch_bkey_to_text(buf, sizeof(buf), k);
printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b),
(uint64_t *) k - i->d, i->keys, buf);
for (j = 0; j < KEY_PTRS(k); j++) {
size_t n = PTR_BUCKET_NR(b->c, k, j);
printk(" bucket %zu", n);
if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
printk(" prio %i",
PTR_BUCKET(b->c, k, j)->prio);
}
printk(" %s\n", bch_ptr_status(b->c, k));
if (next < end(i) &&
bkey_cmp(k, !b->level ? &START_KEY(next) : next) > 0)
printk(KERN_ERR "Key skipped backwards\n");
}
}
static void bch_dump_bucket(struct btree *b)
{
unsigned i;
console_lock();
for (i = 0; i <= b->nsets; i++)
dump_bset(b, b->sets[i].data);
console_unlock();
}
#define for_each_written_bset(b, start, i) \
for (i = (start); \
(void *) i < (void *) (start) + (KEY_SIZE(&b->key) << 9) &&\
i->seq == (start)->seq; \
i = (void *) i + set_blocks(i, block_bytes(b->c)) * \
block_bytes(b->c))
void bch_btree_verify(struct btree *b, struct bset *new)
void bch_btree_verify(struct btree *b)
{
struct btree *v = b->c->verify_data;
struct closure cl;
closure_init_stack(&cl);
struct bset *ondisk, *sorted, *inmemory;
struct bio *bio;
if (!b->c->verify)
if (!b->c->verify || !b->c->verify_ondisk)
return;
closure_wait_event(&b->io.wait, &cl,
atomic_read(&b->io.cl.remaining) == -1);
down(&b->io_mutex);
mutex_lock(&b->c->verify_lock);
ondisk = b->c->verify_ondisk;
sorted = b->c->verify_data->keys.set->data;
inmemory = b->keys.set->data;
bkey_copy(&v->key, &b->key);
v->written = 0;
v->level = b->level;
v->keys.ops = b->keys.ops;
bch_btree_node_read(v);
closure_wait_event(&v->io.wait, &cl,
atomic_read(&b->io.cl.remaining) == -1);
bio = bch_bbio_alloc(b->c);
bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev;
bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0);
bio->bi_iter.bi_size = KEY_SIZE(&v->key) << 9;
bch_bio_map(bio, sorted);
if (new->keys != v->sets[0].data->keys ||
memcmp(new->start,
v->sets[0].data->start,
(void *) end(new) - (void *) new->start)) {
unsigned i, j;
submit_bio_wait(REQ_META|READ_SYNC, bio);
bch_bbio_free(bio, b->c);
memcpy(ondisk, sorted, KEY_SIZE(&v->key) << 9);
bch_btree_node_read_done(v);
sorted = v->keys.set->data;
if (inmemory->keys != sorted->keys ||
memcmp(inmemory->start,
sorted->start,
(void *) bset_bkey_last(inmemory) - (void *) inmemory->start)) {
struct bset *i;
unsigned j;
console_lock();
printk(KERN_ERR "*** original memory node:\n");
for (i = 0; i <= b->nsets; i++)
dump_bset(b, b->sets[i].data);
printk(KERN_ERR "*** in memory:\n");
bch_dump_bset(&b->keys, inmemory, 0);
printk(KERN_ERR "*** sorted memory node:\n");
dump_bset(b, new);
printk(KERN_ERR "*** read back in:\n");
bch_dump_bset(&v->keys, sorted, 0);
printk(KERN_ERR "*** on disk node:\n");
dump_bset(v, v->sets[0].data);
for_each_written_bset(b, ondisk, i) {
unsigned block = ((void *) i - (void *) ondisk) /
block_bytes(b->c);
for (j = 0; j < new->keys; j++)
if (new->d[j] != v->sets[0].data->d[j])
printk(KERN_ERR "*** on disk block %u:\n", block);
bch_dump_bset(&b->keys, i, block);
}
printk(KERN_ERR "*** block %zu not written\n",
((void *) i - (void *) ondisk) / block_bytes(b->c));
for (j = 0; j < inmemory->keys; j++)
if (inmemory->d[j] != sorted->d[j])
break;
printk(KERN_ERR "b->written %u\n", b->written);
console_unlock();
panic("verify failed at %u\n", j);
}
mutex_unlock(&b->c->verify_lock);
up(&b->io_mutex);
}
void bch_data_verify(struct cached_dev *dc, struct bio *bio)
......@@ -207,74 +140,6 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
bio_put(check);
}
int __bch_count_data(struct btree *b)
{
unsigned ret = 0;
struct btree_iter iter;
struct bkey *k;
if (!b->level)
for_each_key(b, k, &iter)
ret += KEY_SIZE(k);
return ret;
}
void __bch_check_keys(struct btree *b, const char *fmt, ...)
{
va_list args;
struct bkey *k, *p = NULL;
struct btree_iter iter;
const char *err;
for_each_key(b, k, &iter) {
if (!b->level) {
err = "Keys out of order";
if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0)
goto bug;
if (bch_ptr_invalid(b, k))
continue;
err = "Overlapping keys";
if (p && bkey_cmp(p, &START_KEY(k)) > 0)
goto bug;
} else {
if (bch_ptr_bad(b, k))
continue;
err = "Duplicate keys";
if (p && !bkey_cmp(p, k))
goto bug;
}
p = k;
}
err = "Key larger than btree node key";
if (p && bkey_cmp(p, &b->key) > 0)
goto bug;
return;
bug:
bch_dump_bucket(b);
va_start(args, fmt);
vprintk(fmt, args);
va_end(args);
panic("bcache error: %s:\n", err);
}
void bch_btree_iter_next_check(struct btree_iter *iter)
{
struct bkey *k = iter->data->k, *next = bkey_next(k);
if (next < iter->data->end &&
bkey_cmp(k, iter->b->level ? next : &START_KEY(next)) > 0) {
bch_dump_bucket(iter->b);
panic("Key skipped backwards\n");
}
}
#endif
#ifdef CONFIG_DEBUG_FS
......@@ -321,7 +186,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
if (!w)
break;
bch_bkey_to_text(kbuf, sizeof(kbuf), &w->key);
bch_extent_to_text(kbuf, sizeof(kbuf), &w->key);
i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", kbuf);
bch_keybuf_del(&i->keys, w);
}
......
#ifndef _BCACHE_DEBUG_H
#define _BCACHE_DEBUG_H
/* Btree/bkey debug printing */
int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k);
struct bio;
struct cached_dev;
struct cache_set;
#ifdef CONFIG_BCACHE_DEBUG
void bch_btree_verify(struct btree *, struct bset *);
void bch_btree_verify(struct btree *);
void bch_data_verify(struct cached_dev *, struct bio *);
int __bch_count_data(struct btree *);
void __bch_check_keys(struct btree *, const char *, ...);
void bch_btree_iter_next_check(struct btree_iter *);
#define EBUG_ON(cond) BUG_ON(cond)
#define expensive_debug_checks(c) ((c)->expensive_debug_checks)
#define key_merging_disabled(c) ((c)->key_merging_disabled)
#define bypass_torture_test(d) ((d)->bypass_torture_test)
#else /* DEBUG */
static inline void bch_btree_verify(struct btree *b, struct bset *i) {}
static inline void bch_btree_verify(struct btree *b) {}
static inline void bch_data_verify(struct cached_dev *dc, struct bio *bio) {}
static inline int __bch_count_data(struct btree *b) { return -1; }
static inline void __bch_check_keys(struct btree *b, const char *fmt, ...) {}
static inline void bch_btree_iter_next_check(struct btree_iter *iter) {}
#define EBUG_ON(cond) do { if (cond); } while (0)
#define expensive_debug_checks(c) 0
#define key_merging_disabled(c) 0
#define bypass_torture_test(d) 0
#endif
#define bch_count_data(b) \
(expensive_debug_checks((b)->c) ? __bch_count_data(b) : -1)
#define bch_check_keys(b, ...) \
do { \
if (expensive_debug_checks((b)->c)) \
__bch_check_keys(b, __VA_ARGS__); \
} while (0)
#ifdef CONFIG_DEBUG_FS
void bch_debug_init_cache_set(struct cache_set *);
#else
......
This diff is collapsed.
#ifndef _BCACHE_EXTENTS_H
#define _BCACHE_EXTENTS_H
extern const struct btree_keys_ops bch_btree_keys_ops;
extern const struct btree_keys_ops bch_extent_keys_ops;
struct bkey;
struct cache_set;
void bch_extent_to_text(char *, size_t, const struct bkey *);
bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
#endif /* _BCACHE_EXTENTS_H */
This diff is collapsed.
......@@ -104,6 +104,7 @@ struct journal {
/* used when waiting because the journal was full */
struct closure_waitlist wait;
struct closure io;
int io_in_flight;
struct delayed_work work;
/* Number of blocks free in the bucket(s) we're currently writing to */
......
......@@ -211,7 +211,7 @@ void bch_moving_gc(struct cache_set *c)
for_each_cache(ca, c, i) {
unsigned sectors_to_move = 0;
unsigned reserve_sectors = ca->sb.bucket_size *
min(fifo_used(&ca->free), ca->free.size / 2);
fifo_used(&ca->free[RESERVE_MOVINGGC]);
ca->heap.used = 0;
......
This diff is collapsed.
......@@ -13,6 +13,10 @@ struct data_insert_op {
uint16_t write_prio;
short error;
union {
uint16_t flags;
struct {
unsigned bypass:1;
unsigned writeback:1;
unsigned flush_journal:1;
......@@ -22,8 +26,9 @@ struct data_insert_op {
unsigned replace_collision:1;
unsigned insert_data_done:1;
};
};
/* Anything past this point won't get zeroed in search_alloc() */
struct keylist insert_keys;
BKEY_PADDED(replace_key);
};
......
This diff is collapsed.
This diff is collapsed.
......@@ -2,6 +2,7 @@
#ifndef _BCACHE_UTIL_H
#define _BCACHE_UTIL_H
#include <linux/blkdev.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/llist.h>
......@@ -17,11 +18,13 @@ struct closure;
#ifdef CONFIG_BCACHE_DEBUG
#define EBUG_ON(cond) BUG_ON(cond)
#define atomic_dec_bug(v) BUG_ON(atomic_dec_return(v) < 0)
#define atomic_inc_bug(v, i) BUG_ON(atomic_inc_return(v) <= i)
#else /* DEBUG */
#define EBUG_ON(cond) do { if (cond); } while (0)
#define atomic_dec_bug(v) atomic_dec(v)
#define atomic_inc_bug(v, i) atomic_inc(v)
......@@ -391,6 +394,11 @@ struct time_stats {
void bch_time_stats_update(struct time_stats *stats, uint64_t time);
static inline unsigned local_clock_us(void)
{
return local_clock() >> 10;
}
#define NSEC_PER_ns 1L
#define NSEC_PER_us NSEC_PER_USEC
#define NSEC_PER_ms NSEC_PER_MSEC
......
......@@ -6103,6 +6103,7 @@ static int run(struct mddev *mddev)
blk_queue_io_min(mddev->queue, chunk_size);
blk_queue_io_opt(mddev->queue, chunk_size *
(conf->raid_disks - conf->max_degraded));
mddev->queue->limits.raid_partial_stripes_expensive = 1;
/*
* We can only discard a whole stripe. It doesn't make sense to
* discard data disk but write parity disk
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment