Commit 53d8ab29 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.14/drivers' of git://git.kernel.dk/linux-block

Pull block IO driver changes from Jens Axboe:

 - bcache update from Kent Overstreet.

 - two bcache fixes from Nicholas Swenson.

 - cciss pci init error fix from Andrew.

 - underflow fix in the parallel IDE pg_write code from Dan Carpenter.
   I'm sure the 1 (or 0) users of that are now happy.

 - two PCI related fixes for sx8 from Jingoo Han.

 - floppy init fix for first block read from Jiri Kosina.

 - pktcdvd error return miss fix from Julia Lawall.

 - removal of IRQF_SHARED from the SEGA Dreamcast CD-ROM code from
   Michael Opdenacker.

 - comment typo fix for the loop driver from Olaf Hering.

 - potential oops fix for null_blk from Raghavendra K T.

 - two fixes from Sam Bradshaw (Micron) for the mtip32xx driver, fixing
   an OOM problem and a problem with handling security locked conditions

* 'for-3.14/drivers' of git://git.kernel.dk/linux-block: (47 commits)
  mg_disk: Spelling s/finised/finished/
  null_blk: Null pointer deference problem in alloc_page_buffers
  mtip32xx: Correctly handle security locked condition
  mtip32xx: Make SGL container per-command to eliminate high order dma allocation
  drivers/block/loop.c: fix comment typo in loop_config_discard
  drivers/block/cciss.c:cciss_init_one(): use proper errnos
  drivers/block/paride/pg.c: underflow bug in pg_write()
  drivers/block/sx8.c: remove unnecessary pci_set_drvdata()
  drivers/block/sx8.c: use module_pci_driver()
  floppy: bail out in open() if drive is not responding to block0 read
  bcache: Fix auxiliary search trees for key size > cacheline size
  bcache: Don't return -EINTR when insert finished
  bcache: Improve bucket_prio() calculation
  bcache: Add bch_bkey_equal_header()
  bcache: update bch_bkey_try_merge
  bcache: Move insert_fixup() to btree_keys_ops
  bcache: Convert sorting to btree_keys
  bcache: Convert debug code to btree_keys
  bcache: Convert btree_iter to struct btree_keys
  bcache: Refactor bset_tree sysfs stats
  ...
parents f568849e 14424be4
...@@ -592,6 +592,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, ...@@ -592,6 +592,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
ret = -1; ret = -1;
} }
t->raid_partial_stripes_expensive =
max(t->raid_partial_stripes_expensive,
b->raid_partial_stripes_expensive);
/* Find lowest common alignment_offset */ /* Find lowest common alignment_offset */
t->alignment_offset = lcm(t->alignment_offset, alignment) t->alignment_offset = lcm(t->alignment_offset, alignment)
& (max(t->physical_block_size, t->io_min) - 1); & (max(t->physical_block_size, t->io_min) - 1);
......
...@@ -5004,7 +5004,7 @@ static int cciss_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ...@@ -5004,7 +5004,7 @@ static int cciss_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
i = alloc_cciss_hba(pdev); i = alloc_cciss_hba(pdev);
if (i < 0) if (i < 0)
return -1; return -ENOMEM;
h = hba[i]; h = hba[i];
h->pdev = pdev; h->pdev = pdev;
...@@ -5205,7 +5205,7 @@ static int cciss_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ...@@ -5205,7 +5205,7 @@ static int cciss_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
*/ */
pci_set_drvdata(pdev, NULL); pci_set_drvdata(pdev, NULL);
free_hba(h); free_hba(h);
return -1; return -ENODEV;
} }
static void cciss_shutdown(struct pci_dev *pdev) static void cciss_shutdown(struct pci_dev *pdev)
......
...@@ -3691,9 +3691,12 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) ...@@ -3691,9 +3691,12 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
if (!(mode & FMODE_NDELAY)) { if (!(mode & FMODE_NDELAY)) {
if (mode & (FMODE_READ|FMODE_WRITE)) { if (mode & (FMODE_READ|FMODE_WRITE)) {
UDRS->last_checked = 0; UDRS->last_checked = 0;
clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
check_disk_change(bdev); check_disk_change(bdev);
if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
goto out; goto out;
if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
goto out;
} }
res = -EROFS; res = -EROFS;
if ((mode & FMODE_WRITE) && if ((mode & FMODE_WRITE) &&
...@@ -3746,17 +3749,29 @@ static unsigned int floppy_check_events(struct gendisk *disk, ...@@ -3746,17 +3749,29 @@ static unsigned int floppy_check_events(struct gendisk *disk,
* a disk in the drive, and whether that disk is writable. * a disk in the drive, and whether that disk is writable.
*/ */
static void floppy_rb0_complete(struct bio *bio, int err) struct rb0_cbdata {
int drive;
struct completion complete;
};
static void floppy_rb0_cb(struct bio *bio, int err)
{ {
complete((struct completion *)bio->bi_private); struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
int drive = cbdata->drive;
if (err) {
pr_info("floppy: error %d while reading block 0", err);
set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
}
complete(&cbdata->complete);
} }
static int __floppy_read_block_0(struct block_device *bdev) static int __floppy_read_block_0(struct block_device *bdev, int drive)
{ {
struct bio bio; struct bio bio;
struct bio_vec bio_vec; struct bio_vec bio_vec;
struct completion complete;
struct page *page; struct page *page;
struct rb0_cbdata cbdata;
size_t size; size_t size;
page = alloc_page(GFP_NOIO); page = alloc_page(GFP_NOIO);
...@@ -3769,6 +3784,8 @@ static int __floppy_read_block_0(struct block_device *bdev) ...@@ -3769,6 +3784,8 @@ static int __floppy_read_block_0(struct block_device *bdev)
if (!size) if (!size)
size = 1024; size = 1024;
cbdata.drive = drive;
bio_init(&bio); bio_init(&bio);
bio.bi_io_vec = &bio_vec; bio.bi_io_vec = &bio_vec;
bio_vec.bv_page = page; bio_vec.bv_page = page;
...@@ -3779,13 +3796,14 @@ static int __floppy_read_block_0(struct block_device *bdev) ...@@ -3779,13 +3796,14 @@ static int __floppy_read_block_0(struct block_device *bdev)
bio.bi_bdev = bdev; bio.bi_bdev = bdev;
bio.bi_iter.bi_sector = 0; bio.bi_iter.bi_sector = 0;
bio.bi_flags = (1 << BIO_QUIET); bio.bi_flags = (1 << BIO_QUIET);
init_completion(&complete); bio.bi_private = &cbdata;
bio.bi_private = &complete; bio.bi_end_io = floppy_rb0_cb;
bio.bi_end_io = floppy_rb0_complete;
submit_bio(READ, &bio); submit_bio(READ, &bio);
process_fd_request(); process_fd_request();
wait_for_completion(&complete);
init_completion(&cbdata.complete);
wait_for_completion(&cbdata.complete);
__free_page(page); __free_page(page);
...@@ -3827,7 +3845,7 @@ static int floppy_revalidate(struct gendisk *disk) ...@@ -3827,7 +3845,7 @@ static int floppy_revalidate(struct gendisk *disk)
UDRS->generation++; UDRS->generation++;
if (drive_no_geom(drive)) { if (drive_no_geom(drive)) {
/* auto-sensing */ /* auto-sensing */
res = __floppy_read_block_0(opened_bdev[drive]); res = __floppy_read_block_0(opened_bdev[drive], drive);
} else { } else {
if (cf) if (cf)
poll_drive(false, FD_RAW_NEED_DISK); poll_drive(false, FD_RAW_NEED_DISK);
......
...@@ -799,7 +799,7 @@ static void loop_config_discard(struct loop_device *lo) ...@@ -799,7 +799,7 @@ static void loop_config_discard(struct loop_device *lo)
/* /*
* We use punch hole to reclaim the free space used by the * We use punch hole to reclaim the free space used by the
* image a.k.a. discard. However we do support discard if * image a.k.a. discard. However we do not support discard if
* encryption is enabled, because it may give an attacker * encryption is enabled, because it may give an attacker
* useful information. * useful information.
*/ */
......
...@@ -915,7 +915,7 @@ static int mg_probe(struct platform_device *plat_dev) ...@@ -915,7 +915,7 @@ static int mg_probe(struct platform_device *plat_dev)
/* disk reset */ /* disk reset */
if (prv_data->dev_attr == MG_STORAGE_DEV) { if (prv_data->dev_attr == MG_STORAGE_DEV) {
/* If POR seq. not yet finised, wait */ /* If POR seq. not yet finished, wait */
err = mg_wait_rstout(host->rstout, MG_TMAX_RSTOUT); err = mg_wait_rstout(host->rstout, MG_TMAX_RSTOUT);
if (err) if (err)
goto probe_err_3b; goto probe_err_3b;
......
This diff is collapsed.
...@@ -69,7 +69,7 @@ ...@@ -69,7 +69,7 @@
* Maximum number of scatter gather entries * Maximum number of scatter gather entries
* a single command may have. * a single command may have.
*/ */
#define MTIP_MAX_SG 128 #define MTIP_MAX_SG 504
/* /*
* Maximum number of slot groups (Command Issue & s_active registers) * Maximum number of slot groups (Command Issue & s_active registers)
...@@ -92,7 +92,7 @@ ...@@ -92,7 +92,7 @@
/* Driver name and version strings */ /* Driver name and version strings */
#define MTIP_DRV_NAME "mtip32xx" #define MTIP_DRV_NAME "mtip32xx"
#define MTIP_DRV_VERSION "1.2.6os3" #define MTIP_DRV_VERSION "1.3.0"
/* Maximum number of minor device numbers per device. */ /* Maximum number of minor device numbers per device. */
#define MTIP_MAX_MINORS 16 #define MTIP_MAX_MINORS 16
...@@ -391,15 +391,13 @@ struct mtip_port { ...@@ -391,15 +391,13 @@ struct mtip_port {
*/ */
dma_addr_t rxfis_dma; dma_addr_t rxfis_dma;
/* /*
* Pointer to the beginning of the command table memory as used * Pointer to the DMA region for RX Fis, Identify, RLE10, and SMART
* by the driver.
*/ */
void *command_table; void *block1;
/* /*
* Pointer to the beginning of the command table memory as used * DMA address of region for RX Fis, Identify, RLE10, and SMART
* by the DMA.
*/ */
dma_addr_t command_tbl_dma; dma_addr_t block1_dma;
/* /*
* Pointer to the beginning of the identify data memory as used * Pointer to the beginning of the identify data memory as used
* by the driver. * by the driver.
......
...@@ -616,6 +616,11 @@ static int __init null_init(void) ...@@ -616,6 +616,11 @@ static int __init null_init(void)
irqmode = NULL_IRQ_NONE; irqmode = NULL_IRQ_NONE;
} }
#endif #endif
if (bs > PAGE_SIZE) {
pr_warn("null_blk: invalid block size\n");
pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
bs = PAGE_SIZE;
}
if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
if (submit_queues < nr_online_nodes) { if (submit_queues < nr_online_nodes) {
......
...@@ -581,7 +581,7 @@ static ssize_t pg_write(struct file *filp, const char __user *buf, size_t count, ...@@ -581,7 +581,7 @@ static ssize_t pg_write(struct file *filp, const char __user *buf, size_t count,
if (hdr.magic != PG_MAGIC) if (hdr.magic != PG_MAGIC)
return -EINVAL; return -EINVAL;
if (hdr.dlen > PG_MAX_DATA) if (hdr.dlen < 0 || hdr.dlen > PG_MAX_DATA)
return -EINVAL; return -EINVAL;
if ((count - hs) > PG_MAX_DATA) if ((count - hs) > PG_MAX_DATA)
return -EINVAL; return -EINVAL;
......
...@@ -706,7 +706,9 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * ...@@ -706,7 +706,9 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
WRITE : READ, __GFP_WAIT); WRITE : READ, __GFP_WAIT);
if (cgc->buflen) { if (cgc->buflen) {
if (blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, __GFP_WAIT)) ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
__GFP_WAIT);
if (ret)
goto out; goto out;
} }
......
...@@ -1744,20 +1744,6 @@ static void carm_remove_one (struct pci_dev *pdev) ...@@ -1744,20 +1744,6 @@ static void carm_remove_one (struct pci_dev *pdev)
kfree(host); kfree(host);
pci_release_regions(pdev); pci_release_regions(pdev);
pci_disable_device(pdev); pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
} }
static int __init carm_init(void) module_pci_driver(carm_driver);
{
return pci_register_driver(&carm_driver);
}
static void __exit carm_exit(void)
{
pci_unregister_driver(&carm_driver);
}
module_init(carm_init);
module_exit(carm_exit);
...@@ -561,11 +561,11 @@ static int gdrom_set_interrupt_handlers(void) ...@@ -561,11 +561,11 @@ static int gdrom_set_interrupt_handlers(void)
int err; int err;
err = request_irq(HW_EVENT_GDROM_CMD, gdrom_command_interrupt, err = request_irq(HW_EVENT_GDROM_CMD, gdrom_command_interrupt,
IRQF_DISABLED, "gdrom_command", &gd); 0, "gdrom_command", &gd);
if (err) if (err)
return err; return err;
err = request_irq(HW_EVENT_GDROM_DMA, gdrom_dma_interrupt, err = request_irq(HW_EVENT_GDROM_DMA, gdrom_dma_interrupt,
IRQF_DISABLED, "gdrom_dma", &gd); 0, "gdrom_dma", &gd);
if (err) if (err)
free_irq(HW_EVENT_GDROM_CMD, &gd); free_irq(HW_EVENT_GDROM_CMD, &gd);
return err; return err;
......
obj-$(CONFIG_BCACHE) += bcache.o obj-$(CONFIG_BCACHE) += bcache.o
bcache-y := alloc.o btree.o bset.o io.o journal.o writeback.o\ bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\
movinggc.o request.o super.o sysfs.o debug.o util.o trace.o stats.o closure.o io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
util.o writeback.o
CFLAGS_request.o += -Iblock CFLAGS_request.o += -Iblock
...@@ -132,10 +132,16 @@ bool bch_bucket_add_unused(struct cache *ca, struct bucket *b) ...@@ -132,10 +132,16 @@ bool bch_bucket_add_unused(struct cache *ca, struct bucket *b)
{ {
BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b)); BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b));
if (fifo_used(&ca->free) > ca->watermark[WATERMARK_MOVINGGC] && if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) {
CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) unsigned i;
return false;
for (i = 0; i < RESERVE_NONE; i++)
if (!fifo_full(&ca->free[i]))
goto add;
return false;
}
add:
b->prio = 0; b->prio = 0;
if (can_inc_bucket_gen(b) && if (can_inc_bucket_gen(b) &&
...@@ -162,8 +168,21 @@ static void invalidate_one_bucket(struct cache *ca, struct bucket *b) ...@@ -162,8 +168,21 @@ static void invalidate_one_bucket(struct cache *ca, struct bucket *b)
fifo_push(&ca->free_inc, b - ca->buckets); fifo_push(&ca->free_inc, b - ca->buckets);
} }
#define bucket_prio(b) \ /*
(((unsigned) (b->prio - ca->set->min_prio)) * GC_SECTORS_USED(b)) * Determines what order we're going to reuse buckets, smallest bucket_prio()
* first: we also take into account the number of sectors of live data in that
* bucket, and in order for that multiply to make sense we have to scale bucket
*
* Thus, we scale the bucket priorities so that the bucket with the smallest
* prio is worth 1/8th of what INITIAL_PRIO is worth.
*/
#define bucket_prio(b) \
({ \
unsigned min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
\
(b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \
})
#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r)) #define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r))
#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r)) #define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r))
...@@ -304,6 +323,21 @@ do { \ ...@@ -304,6 +323,21 @@ do { \
__set_current_state(TASK_RUNNING); \ __set_current_state(TASK_RUNNING); \
} while (0) } while (0)
static int bch_allocator_push(struct cache *ca, long bucket)
{
unsigned i;
/* Prios/gens are actually the most important reserve */
if (fifo_push(&ca->free[RESERVE_PRIO], bucket))
return true;
for (i = 0; i < RESERVE_NR; i++)
if (fifo_push(&ca->free[i], bucket))
return true;
return false;
}
static int bch_allocator_thread(void *arg) static int bch_allocator_thread(void *arg)
{ {
struct cache *ca = arg; struct cache *ca = arg;
...@@ -336,9 +370,7 @@ static int bch_allocator_thread(void *arg) ...@@ -336,9 +370,7 @@ static int bch_allocator_thread(void *arg)
mutex_lock(&ca->set->bucket_lock); mutex_lock(&ca->set->bucket_lock);
} }
allocator_wait(ca, !fifo_full(&ca->free)); allocator_wait(ca, bch_allocator_push(ca, bucket));
fifo_push(&ca->free, bucket);
wake_up(&ca->set->bucket_wait); wake_up(&ca->set->bucket_wait);
} }
...@@ -365,34 +397,29 @@ static int bch_allocator_thread(void *arg) ...@@ -365,34 +397,29 @@ static int bch_allocator_thread(void *arg)
} }
} }
long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait) long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
{ {
DEFINE_WAIT(w); DEFINE_WAIT(w);
struct bucket *b; struct bucket *b;
long r; long r;
/* fastpath */ /* fastpath */
if (fifo_used(&ca->free) > ca->watermark[watermark]) { if (fifo_pop(&ca->free[RESERVE_NONE], r) ||
fifo_pop(&ca->free, r); fifo_pop(&ca->free[reserve], r))
goto out; goto out;
}
if (!wait) if (!wait)
return -1; return -1;
while (1) { do {
if (fifo_used(&ca->free) > ca->watermark[watermark]) {
fifo_pop(&ca->free, r);
break;
}
prepare_to_wait(&ca->set->bucket_wait, &w, prepare_to_wait(&ca->set->bucket_wait, &w,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
mutex_unlock(&ca->set->bucket_lock); mutex_unlock(&ca->set->bucket_lock);
schedule(); schedule();
mutex_lock(&ca->set->bucket_lock); mutex_lock(&ca->set->bucket_lock);
} } while (!fifo_pop(&ca->free[RESERVE_NONE], r) &&
!fifo_pop(&ca->free[reserve], r));
finish_wait(&ca->set->bucket_wait, &w); finish_wait(&ca->set->bucket_wait, &w);
out: out:
...@@ -401,12 +428,14 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait) ...@@ -401,12 +428,14 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait)
if (expensive_debug_checks(ca->set)) { if (expensive_debug_checks(ca->set)) {
size_t iter; size_t iter;
long i; long i;
unsigned j;
for (iter = 0; iter < prio_buckets(ca) * 2; iter++) for (iter = 0; iter < prio_buckets(ca) * 2; iter++)
BUG_ON(ca->prio_buckets[iter] == (uint64_t) r); BUG_ON(ca->prio_buckets[iter] == (uint64_t) r);
fifo_for_each(i, &ca->free, iter) for (j = 0; j < RESERVE_NR; j++)
BUG_ON(i == r); fifo_for_each(i, &ca->free[j], iter)
BUG_ON(i == r);
fifo_for_each(i, &ca->free_inc, iter) fifo_for_each(i, &ca->free_inc, iter)
BUG_ON(i == r); BUG_ON(i == r);
fifo_for_each(i, &ca->unused, iter) fifo_for_each(i, &ca->unused, iter)
...@@ -419,7 +448,7 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait) ...@@ -419,7 +448,7 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait)
SET_GC_SECTORS_USED(b, ca->sb.bucket_size); SET_GC_SECTORS_USED(b, ca->sb.bucket_size);
if (watermark <= WATERMARK_METADATA) { if (reserve <= RESERVE_PRIO) {
SET_GC_MARK(b, GC_MARK_METADATA); SET_GC_MARK(b, GC_MARK_METADATA);
SET_GC_MOVE(b, 0); SET_GC_MOVE(b, 0);
b->prio = BTREE_PRIO; b->prio = BTREE_PRIO;
...@@ -445,7 +474,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k) ...@@ -445,7 +474,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k)
} }
} }
int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
struct bkey *k, int n, bool wait) struct bkey *k, int n, bool wait)
{ {
int i; int i;
...@@ -459,7 +488,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, ...@@ -459,7 +488,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
struct cache *ca = c->cache_by_alloc[i]; struct cache *ca = c->cache_by_alloc[i];
long b = bch_bucket_alloc(ca, watermark, wait); long b = bch_bucket_alloc(ca, reserve, wait);
if (b == -1) if (b == -1)
goto err; goto err;
...@@ -478,12 +507,12 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, ...@@ -478,12 +507,12 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
return -1; return -1;
} }
int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
struct bkey *k, int n, bool wait) struct bkey *k, int n, bool wait)
{ {
int ret; int ret;
mutex_lock(&c->bucket_lock); mutex_lock(&c->bucket_lock);
ret = __bch_bucket_alloc_set(c, watermark, k, n, wait); ret = __bch_bucket_alloc_set(c, reserve, k, n, wait);
mutex_unlock(&c->bucket_lock); mutex_unlock(&c->bucket_lock);
return ret; return ret;
} }
...@@ -573,8 +602,8 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors, ...@@ -573,8 +602,8 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) { while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
unsigned watermark = write_prio unsigned watermark = write_prio
? WATERMARK_MOVINGGC ? RESERVE_MOVINGGC
: WATERMARK_NONE; : RESERVE_NONE;
spin_unlock(&c->data_bucket_lock); spin_unlock(&c->data_bucket_lock);
...@@ -689,7 +718,7 @@ int bch_cache_allocator_init(struct cache *ca) ...@@ -689,7 +718,7 @@ int bch_cache_allocator_init(struct cache *ca)
* Then 8 for btree allocations * Then 8 for btree allocations
* Then half for the moving garbage collector * Then half for the moving garbage collector
*/ */
#if 0
ca->watermark[WATERMARK_PRIO] = 0; ca->watermark[WATERMARK_PRIO] = 0;
ca->watermark[WATERMARK_METADATA] = prio_buckets(ca); ca->watermark[WATERMARK_METADATA] = prio_buckets(ca);
...@@ -699,6 +728,6 @@ int bch_cache_allocator_init(struct cache *ca) ...@@ -699,6 +728,6 @@ int bch_cache_allocator_init(struct cache *ca)
ca->watermark[WATERMARK_NONE] = ca->free.size / 2 + ca->watermark[WATERMARK_NONE] = ca->free.size / 2 +
ca->watermark[WATERMARK_MOVINGGC]; ca->watermark[WATERMARK_MOVINGGC];
#endif
return 0; return 0;
} }
...@@ -187,6 +187,7 @@ ...@@ -187,6 +187,7 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include "bset.h"
#include "util.h" #include "util.h"
#include "closure.h" #include "closure.h"
...@@ -309,7 +310,8 @@ struct cached_dev { ...@@ -309,7 +310,8 @@ struct cached_dev {
struct cache_sb sb; struct cache_sb sb;
struct bio sb_bio; struct bio sb_bio;
struct bio_vec sb_bv[1]; struct bio_vec sb_bv[1];
struct closure_with_waitlist sb_write; struct closure sb_write;
struct semaphore sb_write_mutex;
/* Refcount on the cache set. Always nonzero when we're caching. */ /* Refcount on the cache set. Always nonzero when we're caching. */
atomic_t count; atomic_t count;
...@@ -382,12 +384,12 @@ struct cached_dev { ...@@ -382,12 +384,12 @@ struct cached_dev {
unsigned writeback_rate_p_term_inverse; unsigned writeback_rate_p_term_inverse;
}; };
enum alloc_watermarks { enum alloc_reserve {
WATERMARK_PRIO, RESERVE_BTREE,
WATERMARK_METADATA, RESERVE_PRIO,
WATERMARK_MOVINGGC, RESERVE_MOVINGGC,
WATERMARK_NONE, RESERVE_NONE,
WATERMARK_MAX RESERVE_NR,
}; };
struct cache { struct cache {
...@@ -399,8 +401,6 @@ struct cache { ...@@ -399,8 +401,6 @@ struct cache {
struct kobject kobj; struct kobject kobj;
struct block_device *bdev; struct block_device *bdev;
unsigned watermark[WATERMARK_MAX];
struct task_struct *alloc_thread; struct task_struct *alloc_thread;
struct closure prio; struct closure prio;
...@@ -429,7 +429,7 @@ struct cache { ...@@ -429,7 +429,7 @@ struct cache {
* because all the data they contained was overwritten), so we only * because all the data they contained was overwritten), so we only
* need to discard them before they can be moved to the free list. * need to discard them before they can be moved to the free list.
*/ */
DECLARE_FIFO(long, free); DECLARE_FIFO(long, free)[RESERVE_NR];
DECLARE_FIFO(long, free_inc); DECLARE_FIFO(long, free_inc);
DECLARE_FIFO(long, unused); DECLARE_FIFO(long, unused);
...@@ -514,7 +514,8 @@ struct cache_set { ...@@ -514,7 +514,8 @@ struct cache_set {
uint64_t cached_dev_sectors; uint64_t cached_dev_sectors;
struct closure caching; struct closure caching;
struct closure_with_waitlist sb_write; struct closure sb_write;
struct semaphore sb_write_mutex;
mempool_t *search; mempool_t *search;
mempool_t *bio_meta; mempool_t *bio_meta;
...@@ -629,13 +630,15 @@ struct cache_set { ...@@ -629,13 +630,15 @@ struct cache_set {
#ifdef CONFIG_BCACHE_DEBUG #ifdef CONFIG_BCACHE_DEBUG
struct btree *verify_data; struct btree *verify_data;
struct bset *verify_ondisk;
struct mutex verify_lock; struct mutex verify_lock;
#endif #endif
unsigned nr_uuids; unsigned nr_uuids;
struct uuid_entry *uuids; struct uuid_entry *uuids;
BKEY_PADDED(uuid_bucket); BKEY_PADDED(uuid_bucket);
struct closure_with_waitlist uuid_write; struct closure uuid_write;
struct semaphore uuid_write_mutex;
/* /*
* A btree node on disk could have too many bsets for an iterator to fit * A btree node on disk could have too many bsets for an iterator to fit
...@@ -643,13 +646,7 @@ struct cache_set { ...@@ -643,13 +646,7 @@ struct cache_set {
*/ */
mempool_t *fill_iter; mempool_t *fill_iter;
/* struct bset_sort_state sort;
* btree_sort() is a merge sort and requires temporary space - single
* element mempool
*/
struct mutex sort_lock;
struct bset *sort;
unsigned sort_crit_factor;
/* List of buckets we're currently writing data to */ /* List of buckets we're currently writing data to */
struct list_head data_buckets; struct list_head data_buckets;
...@@ -665,7 +662,6 @@ struct cache_set { ...@@ -665,7 +662,6 @@ struct cache_set {
unsigned congested_read_threshold_us; unsigned congested_read_threshold_us;
unsigned congested_write_threshold_us; unsigned congested_write_threshold_us;
struct time_stats sort_time;
struct time_stats btree_gc_time; struct time_stats btree_gc_time;
struct time_stats btree_split_time; struct time_stats btree_split_time;
struct time_stats btree_read_time; struct time_stats btree_read_time;
...@@ -683,9 +679,9 @@ struct cache_set { ...@@ -683,9 +679,9 @@ struct cache_set {
unsigned error_decay; unsigned error_decay;
unsigned short journal_delay_ms; unsigned short journal_delay_ms;
bool expensive_debug_checks;
unsigned verify:1; unsigned verify:1;
unsigned key_merging_disabled:1; unsigned key_merging_disabled:1;
unsigned expensive_debug_checks:1;
unsigned gc_always_rewrite:1; unsigned gc_always_rewrite:1;
unsigned shrinker_disabled:1; unsigned shrinker_disabled:1;
unsigned copy_gc_enabled:1; unsigned copy_gc_enabled:1;
...@@ -707,13 +703,8 @@ struct bbio { ...@@ -707,13 +703,8 @@ struct bbio {
struct bio bio; struct bio bio;
}; };
static inline unsigned local_clock_us(void)
{
return local_clock() >> 10;
}
#define BTREE_PRIO USHRT_MAX #define BTREE_PRIO USHRT_MAX
#define INITIAL_PRIO 32768 #define INITIAL_PRIO 32768U
#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE) #define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE)
#define btree_blocks(b) \ #define btree_blocks(b) \
...@@ -726,21 +717,6 @@ static inline unsigned local_clock_us(void) ...@@ -726,21 +717,6 @@ static inline unsigned local_clock_us(void)
#define bucket_bytes(c) ((c)->sb.bucket_size << 9) #define bucket_bytes(c) ((c)->sb.bucket_size << 9)
#define block_bytes(c) ((c)->sb.block_size << 9) #define block_bytes(c) ((c)->sb.block_size << 9)
#define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t))
#define set_bytes(i) __set_bytes(i, i->keys)
#define __set_blocks(i, k, c) DIV_ROUND_UP(__set_bytes(i, k), block_bytes(c))
#define set_blocks(i, c) __set_blocks(i, (i)->keys, c)
#define node(i, j) ((struct bkey *) ((i)->d + (j)))
#define end(i) node(i, (i)->keys)
#define index(i, b) \
((size_t) (((void *) i - (void *) (b)->sets[0].data) / \
block_bytes(b->c)))
#define btree_data_space(b) (PAGE_SIZE << (b)->page_order)
#define prios_per_bucket(c) \ #define prios_per_bucket(c) \
((bucket_bytes(c) - sizeof(struct prio_set)) / \ ((bucket_bytes(c) - sizeof(struct prio_set)) / \
sizeof(struct bucket_disk)) sizeof(struct bucket_disk))
...@@ -783,20 +759,34 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c, ...@@ -783,20 +759,34 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c,
return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr); return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
} }
/* Btree key macros */ static inline uint8_t gen_after(uint8_t a, uint8_t b)
{
uint8_t r = a - b;
return r > 128U ? 0 : r;
}
static inline void bkey_init(struct bkey *k) static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
unsigned i)
{ {
*k = ZERO_KEY; return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
} }
static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
unsigned i)
{
return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
}
/* Btree key macros */
/* /*
* This is used for various on disk data structures - cache_sb, prio_set, bset, * This is used for various on disk data structures - cache_sb, prio_set, bset,
* jset: The checksum is _always_ the first 8 bytes of these structs * jset: The checksum is _always_ the first 8 bytes of these structs
*/ */
#define csum_set(i) \ #define csum_set(i) \
bch_crc64(((void *) (i)) + sizeof(uint64_t), \ bch_crc64(((void *) (i)) + sizeof(uint64_t), \
((void *) end(i)) - (((void *) (i)) + sizeof(uint64_t))) ((void *) bset_bkey_last(i)) - \
(((void *) (i)) + sizeof(uint64_t)))
/* Error handling macros */ /* Error handling macros */
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -130,20 +130,12 @@ struct btree { ...@@ -130,20 +130,12 @@ struct btree {
unsigned long flags; unsigned long flags;
uint16_t written; /* would be nice to kill */ uint16_t written; /* would be nice to kill */
uint8_t level; uint8_t level;
uint8_t nsets;
uint8_t page_order; struct btree_keys keys;
/*
* Set of sorted keys - the real btree node - plus a binary search tree
*
* sets[0] is special; set[0]->tree, set[0]->prev and set[0]->data point
* to the memory we have allocated for this btree node. Additionally,
* set[0]->data points to the entire btree node as it exists on disk.
*/
struct bset_tree sets[MAX_BSETS];
/* For outstanding btree writes, used as a lock - protects write_idx */ /* For outstanding btree writes, used as a lock - protects write_idx */
struct closure_with_waitlist io; struct closure io;
struct semaphore io_mutex;
struct list_head list; struct list_head list;
struct delayed_work work; struct delayed_work work;
...@@ -179,24 +171,19 @@ static inline struct btree_write *btree_prev_write(struct btree *b) ...@@ -179,24 +171,19 @@ static inline struct btree_write *btree_prev_write(struct btree *b)
return b->writes + (btree_node_write_idx(b) ^ 1); return b->writes + (btree_node_write_idx(b) ^ 1);
} }
static inline unsigned bset_offset(struct btree *b, struct bset *i) static inline struct bset *btree_bset_first(struct btree *b)
{ {
return (((size_t) i) - ((size_t) b->sets->data)) >> 9; return b->keys.set->data;
} }
static inline struct bset *write_block(struct btree *b) static inline struct bset *btree_bset_last(struct btree *b)
{ {
return ((void *) b->sets[0].data) + b->written * block_bytes(b->c); return bset_tree_last(&b->keys)->data;
} }
static inline bool bset_written(struct btree *b, struct bset_tree *t) static inline unsigned bset_block_offset(struct btree *b, struct bset *i)
{ {
return t->data < write_block(b); return bset_sector_offset(&b->keys, i) >> b->c->block_bits;
}
static inline bool bkey_written(struct btree *b, struct bkey *k)
{
return k < write_block(b)->start;
} }
static inline void set_gc_sectors(struct cache_set *c) static inline void set_gc_sectors(struct cache_set *c)
...@@ -204,21 +191,6 @@ static inline void set_gc_sectors(struct cache_set *c) ...@@ -204,21 +191,6 @@ static inline void set_gc_sectors(struct cache_set *c)
atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 16); atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 16);
} }
static inline struct bkey *bch_btree_iter_init(struct btree *b,
struct btree_iter *iter,
struct bkey *search)
{
return __bch_btree_iter_init(b, iter, search, b->sets);
}
static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k)
{
if (b->level)
return bch_btree_ptr_invalid(b->c, k);
else
return bch_extent_ptr_invalid(b->c, k);
}
void bkey_put(struct cache_set *c, struct bkey *k); void bkey_put(struct cache_set *c, struct bkey *k);
/* Looping macros */ /* Looping macros */
...@@ -229,17 +201,12 @@ void bkey_put(struct cache_set *c, struct bkey *k); ...@@ -229,17 +201,12 @@ void bkey_put(struct cache_set *c, struct bkey *k);
iter++) \ iter++) \
hlist_for_each_entry_rcu((b), (c)->bucket_hash + iter, hash) hlist_for_each_entry_rcu((b), (c)->bucket_hash + iter, hash)
#define for_each_key_filter(b, k, iter, filter) \
for (bch_btree_iter_init((b), (iter), NULL); \
((k) = bch_btree_iter_next_filter((iter), b, filter));)
#define for_each_key(b, k, iter) \
for (bch_btree_iter_init((b), (iter), NULL); \
((k) = bch_btree_iter_next(iter));)
/* Recursing down the btree */ /* Recursing down the btree */
struct btree_op { struct btree_op {
/* for waiting on btree reserve in btree_split() */
wait_queue_t wait;
/* Btree level at which we start taking write locks */ /* Btree level at which we start taking write locks */
short lock; short lock;
...@@ -249,6 +216,7 @@ struct btree_op { ...@@ -249,6 +216,7 @@ struct btree_op {
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level) static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
{ {
memset(op, 0, sizeof(struct btree_op)); memset(op, 0, sizeof(struct btree_op));
init_wait(&op->wait);
op->lock = write_lock_level; op->lock = write_lock_level;
} }
...@@ -267,7 +235,7 @@ static inline void rw_unlock(bool w, struct btree *b) ...@@ -267,7 +235,7 @@ static inline void rw_unlock(bool w, struct btree *b)
(w ? up_write : up_read)(&b->lock); (w ? up_write : up_read)(&b->lock);
} }
void bch_btree_node_read(struct btree *); void bch_btree_node_read_done(struct btree *);
void bch_btree_node_write(struct btree *, struct closure *); void bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_set_root(struct btree *); void bch_btree_set_root(struct btree *);
......
...@@ -11,19 +11,6 @@ ...@@ -11,19 +11,6 @@
#include "closure.h" #include "closure.h"
#define CL_FIELD(type, field) \
case TYPE_ ## type: \
return &container_of(cl, struct type, cl)->field
static struct closure_waitlist *closure_waitlist(struct closure *cl)
{
switch (cl->type) {
CL_FIELD(closure_with_waitlist, wait);
default:
return NULL;
}
}
static inline void closure_put_after_sub(struct closure *cl, int flags) static inline void closure_put_after_sub(struct closure *cl, int flags)
{ {
int r = flags & CLOSURE_REMAINING_MASK; int r = flags & CLOSURE_REMAINING_MASK;
...@@ -42,17 +29,10 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) ...@@ -42,17 +29,10 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
closure_queue(cl); closure_queue(cl);
} else { } else {
struct closure *parent = cl->parent; struct closure *parent = cl->parent;
struct closure_waitlist *wait = closure_waitlist(cl);
closure_fn *destructor = cl->fn; closure_fn *destructor = cl->fn;
closure_debug_destroy(cl); closure_debug_destroy(cl);
smp_mb();
atomic_set(&cl->remaining, -1);
if (wait)
closure_wake_up(wait);
if (destructor) if (destructor)
destructor(cl); destructor(cl);
...@@ -69,19 +49,18 @@ void closure_sub(struct closure *cl, int v) ...@@ -69,19 +49,18 @@ void closure_sub(struct closure *cl, int v)
} }
EXPORT_SYMBOL(closure_sub); EXPORT_SYMBOL(closure_sub);
/**
* closure_put - decrement a closure's refcount
*/
void closure_put(struct closure *cl) void closure_put(struct closure *cl)
{ {
closure_put_after_sub(cl, atomic_dec_return(&cl->remaining)); closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
} }
EXPORT_SYMBOL(closure_put); EXPORT_SYMBOL(closure_put);
static void set_waiting(struct closure *cl, unsigned long f) /**
{ * closure_wake_up - wake up all closures on a wait list, without memory barrier
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG */
cl->waiting_on = f;
#endif
}
void __closure_wake_up(struct closure_waitlist *wait_list) void __closure_wake_up(struct closure_waitlist *wait_list)
{ {
struct llist_node *list; struct llist_node *list;
...@@ -106,27 +85,34 @@ void __closure_wake_up(struct closure_waitlist *wait_list) ...@@ -106,27 +85,34 @@ void __closure_wake_up(struct closure_waitlist *wait_list)
cl = container_of(reverse, struct closure, list); cl = container_of(reverse, struct closure, list);
reverse = llist_next(reverse); reverse = llist_next(reverse);
set_waiting(cl, 0); closure_set_waiting(cl, 0);
closure_sub(cl, CLOSURE_WAITING + 1); closure_sub(cl, CLOSURE_WAITING + 1);
} }
} }
EXPORT_SYMBOL(__closure_wake_up); EXPORT_SYMBOL(__closure_wake_up);
bool closure_wait(struct closure_waitlist *list, struct closure *cl) /**
* closure_wait - add a closure to a waitlist
*
* @waitlist will own a ref on @cl, which will be released when
* closure_wake_up() is called on @waitlist.
*
*/
bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
{ {
if (atomic_read(&cl->remaining) & CLOSURE_WAITING) if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
return false; return false;
set_waiting(cl, _RET_IP_); closure_set_waiting(cl, _RET_IP_);
atomic_add(CLOSURE_WAITING + 1, &cl->remaining); atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
llist_add(&cl->list, &list->list); llist_add(&cl->list, &waitlist->list);
return true; return true;
} }
EXPORT_SYMBOL(closure_wait); EXPORT_SYMBOL(closure_wait);
/** /**
* closure_sync() - sleep until a closure a closure has nothing left to wait on * closure_sync - sleep until a closure a closure has nothing left to wait on
* *
* Sleeps until the refcount hits 1 - the thread that's running the closure owns * Sleeps until the refcount hits 1 - the thread that's running the closure owns
* the last refcount. * the last refcount.
...@@ -148,46 +134,6 @@ void closure_sync(struct closure *cl) ...@@ -148,46 +134,6 @@ void closure_sync(struct closure *cl)
} }
EXPORT_SYMBOL(closure_sync); EXPORT_SYMBOL(closure_sync);
/**
* closure_trylock() - try to acquire the closure, without waiting
* @cl: closure to lock
*
* Returns true if the closure was succesfully locked.
*/
bool closure_trylock(struct closure *cl, struct closure *parent)
{
if (atomic_cmpxchg(&cl->remaining, -1,
CLOSURE_REMAINING_INITIALIZER) != -1)
return false;
smp_mb();
cl->parent = parent;
if (parent)
closure_get(parent);
closure_set_ret_ip(cl);
closure_debug_create(cl);
return true;
}
EXPORT_SYMBOL(closure_trylock);
void __closure_lock(struct closure *cl, struct closure *parent,
struct closure_waitlist *wait_list)
{
struct closure wait;
closure_init_stack(&wait);
while (1) {
if (closure_trylock(cl, parent))
return;
closure_wait_event(wait_list, &wait,
atomic_read(&cl->remaining) == -1);
}
}
EXPORT_SYMBOL(__closure_lock);
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
static LIST_HEAD(closure_list); static LIST_HEAD(closure_list);
......
This diff is collapsed.
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "bcache.h" #include "bcache.h"
#include "btree.h" #include "btree.h"
#include "debug.h" #include "debug.h"
#include "extents.h"
#include <linux/console.h> #include <linux/console.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
...@@ -17,156 +18,88 @@ ...@@ -17,156 +18,88 @@
static struct dentry *debug; static struct dentry *debug;
const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
{
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i)) {
struct cache *ca = PTR_CACHE(c, k, i);
size_t bucket = PTR_BUCKET_NR(c, k, i);
size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
if (KEY_SIZE(k) + r > c->sb.bucket_size)
return "bad, length too big";
if (bucket < ca->sb.first_bucket)
return "bad, short offset";
if (bucket >= ca->sb.nbuckets)
return "bad, offset past end of device";
if (ptr_stale(c, k, i))
return "stale";
}
if (!bkey_cmp(k, &ZERO_KEY))
return "bad, null key";
if (!KEY_PTRS(k))
return "bad, no pointers";
if (!KEY_SIZE(k))
return "zeroed key";
return "";
}
int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k)
{
unsigned i = 0;
char *out = buf, *end = buf + size;
#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_OFFSET(k), KEY_SIZE(k));
if (KEY_PTRS(k))
while (1) {
p("%llu:%llu gen %llu",
PTR_DEV(k, i), PTR_OFFSET(k, i), PTR_GEN(k, i));
if (++i == KEY_PTRS(k))
break;
p(", ");
}
p("]");
if (KEY_DIRTY(k))
p(" dirty");
if (KEY_CSUM(k))
p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
#undef p
return out - buf;
}
#ifdef CONFIG_BCACHE_DEBUG #ifdef CONFIG_BCACHE_DEBUG
static void dump_bset(struct btree *b, struct bset *i) #define for_each_written_bset(b, start, i) \
{ for (i = (start); \
struct bkey *k, *next; (void *) i < (void *) (start) + (KEY_SIZE(&b->key) << 9) &&\
unsigned j; i->seq == (start)->seq; \
char buf[80]; i = (void *) i + set_blocks(i, block_bytes(b->c)) * \
block_bytes(b->c))
for (k = i->start; k < end(i); k = next) {
next = bkey_next(k);
bch_bkey_to_text(buf, sizeof(buf), k);
printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b),
(uint64_t *) k - i->d, i->keys, buf);
for (j = 0; j < KEY_PTRS(k); j++) {
size_t n = PTR_BUCKET_NR(b->c, k, j);
printk(" bucket %zu", n);
if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
printk(" prio %i",
PTR_BUCKET(b->c, k, j)->prio);
}
printk(" %s\n", bch_ptr_status(b->c, k)); void bch_btree_verify(struct btree *b)
if (next < end(i) &&
bkey_cmp(k, !b->level ? &START_KEY(next) : next) > 0)
printk(KERN_ERR "Key skipped backwards\n");
}
}
static void bch_dump_bucket(struct btree *b)
{
unsigned i;
console_lock();
for (i = 0; i <= b->nsets; i++)
dump_bset(b, b->sets[i].data);
console_unlock();
}
void bch_btree_verify(struct btree *b, struct bset *new)
{ {
struct btree *v = b->c->verify_data; struct btree *v = b->c->verify_data;
struct closure cl; struct bset *ondisk, *sorted, *inmemory;
closure_init_stack(&cl); struct bio *bio;
if (!b->c->verify) if (!b->c->verify || !b->c->verify_ondisk)
return; return;
closure_wait_event(&b->io.wait, &cl, down(&b->io_mutex);
atomic_read(&b->io.cl.remaining) == -1);
mutex_lock(&b->c->verify_lock); mutex_lock(&b->c->verify_lock);
ondisk = b->c->verify_ondisk;
sorted = b->c->verify_data->keys.set->data;
inmemory = b->keys.set->data;
bkey_copy(&v->key, &b->key); bkey_copy(&v->key, &b->key);
v->written = 0; v->written = 0;
v->level = b->level; v->level = b->level;
v->keys.ops = b->keys.ops;
bio = bch_bbio_alloc(b->c);
bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev;
bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0);
bio->bi_iter.bi_size = KEY_SIZE(&v->key) << 9;
bch_bio_map(bio, sorted);
bch_btree_node_read(v); submit_bio_wait(REQ_META|READ_SYNC, bio);
closure_wait_event(&v->io.wait, &cl, bch_bbio_free(bio, b->c);
atomic_read(&b->io.cl.remaining) == -1);
if (new->keys != v->sets[0].data->keys || memcpy(ondisk, sorted, KEY_SIZE(&v->key) << 9);
memcmp(new->start,
v->sets[0].data->start, bch_btree_node_read_done(v);
(void *) end(new) - (void *) new->start)) { sorted = v->keys.set->data;
unsigned i, j;
if (inmemory->keys != sorted->keys ||
memcmp(inmemory->start,
sorted->start,
(void *) bset_bkey_last(inmemory) - (void *) inmemory->start)) {
struct bset *i;
unsigned j;
console_lock(); console_lock();
printk(KERN_ERR "*** original memory node:\n"); printk(KERN_ERR "*** in memory:\n");
for (i = 0; i <= b->nsets; i++) bch_dump_bset(&b->keys, inmemory, 0);
dump_bset(b, b->sets[i].data);
printk(KERN_ERR "*** sorted memory node:\n"); printk(KERN_ERR "*** read back in:\n");
dump_bset(b, new); bch_dump_bset(&v->keys, sorted, 0);
printk(KERN_ERR "*** on disk node:\n"); for_each_written_bset(b, ondisk, i) {
dump_bset(v, v->sets[0].data); unsigned block = ((void *) i - (void *) ondisk) /
block_bytes(b->c);
printk(KERN_ERR "*** on disk block %u:\n", block);
bch_dump_bset(&b->keys, i, block);
}
for (j = 0; j < new->keys; j++) printk(KERN_ERR "*** block %zu not written\n",
if (new->d[j] != v->sets[0].data->d[j]) ((void *) i - (void *) ondisk) / block_bytes(b->c));
for (j = 0; j < inmemory->keys; j++)
if (inmemory->d[j] != sorted->d[j])
break; break;
printk(KERN_ERR "b->written %u\n", b->written);
console_unlock(); console_unlock();
panic("verify failed at %u\n", j); panic("verify failed at %u\n", j);
} }
mutex_unlock(&b->c->verify_lock); mutex_unlock(&b->c->verify_lock);
up(&b->io_mutex);
} }
void bch_data_verify(struct cached_dev *dc, struct bio *bio) void bch_data_verify(struct cached_dev *dc, struct bio *bio)
...@@ -207,74 +140,6 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) ...@@ -207,74 +140,6 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
bio_put(check); bio_put(check);
} }
int __bch_count_data(struct btree *b)
{
unsigned ret = 0;
struct btree_iter iter;
struct bkey *k;
if (!b->level)
for_each_key(b, k, &iter)
ret += KEY_SIZE(k);
return ret;
}
void __bch_check_keys(struct btree *b, const char *fmt, ...)
{
va_list args;
struct bkey *k, *p = NULL;
struct btree_iter iter;
const char *err;
for_each_key(b, k, &iter) {
if (!b->level) {
err = "Keys out of order";
if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0)
goto bug;
if (bch_ptr_invalid(b, k))
continue;
err = "Overlapping keys";
if (p && bkey_cmp(p, &START_KEY(k)) > 0)
goto bug;
} else {
if (bch_ptr_bad(b, k))
continue;
err = "Duplicate keys";
if (p && !bkey_cmp(p, k))
goto bug;
}
p = k;
}
err = "Key larger than btree node key";
if (p && bkey_cmp(p, &b->key) > 0)
goto bug;
return;
bug:
bch_dump_bucket(b);
va_start(args, fmt);
vprintk(fmt, args);
va_end(args);
panic("bcache error: %s:\n", err);
}
void bch_btree_iter_next_check(struct btree_iter *iter)
{
struct bkey *k = iter->data->k, *next = bkey_next(k);
if (next < iter->data->end &&
bkey_cmp(k, iter->b->level ? next : &START_KEY(next)) > 0) {
bch_dump_bucket(iter->b);
panic("Key skipped backwards\n");
}
}
#endif #endif
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
...@@ -321,7 +186,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf, ...@@ -321,7 +186,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
if (!w) if (!w)
break; break;
bch_bkey_to_text(kbuf, sizeof(kbuf), &w->key); bch_extent_to_text(kbuf, sizeof(kbuf), &w->key);
i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", kbuf); i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", kbuf);
bch_keybuf_del(&i->keys, w); bch_keybuf_del(&i->keys, w);
} }
......
#ifndef _BCACHE_DEBUG_H #ifndef _BCACHE_DEBUG_H
#define _BCACHE_DEBUG_H #define _BCACHE_DEBUG_H
/* Btree/bkey debug printing */ struct bio;
struct cached_dev;
int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k); struct cache_set;
#ifdef CONFIG_BCACHE_DEBUG #ifdef CONFIG_BCACHE_DEBUG
void bch_btree_verify(struct btree *, struct bset *); void bch_btree_verify(struct btree *);
void bch_data_verify(struct cached_dev *, struct bio *); void bch_data_verify(struct cached_dev *, struct bio *);
int __bch_count_data(struct btree *);
void __bch_check_keys(struct btree *, const char *, ...);
void bch_btree_iter_next_check(struct btree_iter *);
#define EBUG_ON(cond) BUG_ON(cond)
#define expensive_debug_checks(c) ((c)->expensive_debug_checks) #define expensive_debug_checks(c) ((c)->expensive_debug_checks)
#define key_merging_disabled(c) ((c)->key_merging_disabled) #define key_merging_disabled(c) ((c)->key_merging_disabled)
#define bypass_torture_test(d) ((d)->bypass_torture_test) #define bypass_torture_test(d) ((d)->bypass_torture_test)
#else /* DEBUG */ #else /* DEBUG */
static inline void bch_btree_verify(struct btree *b, struct bset *i) {} static inline void bch_btree_verify(struct btree *b) {}
static inline void bch_data_verify(struct cached_dev *dc, struct bio *bio) {} static inline void bch_data_verify(struct cached_dev *dc, struct bio *bio) {}
static inline int __bch_count_data(struct btree *b) { return -1; }
static inline void __bch_check_keys(struct btree *b, const char *fmt, ...) {}
static inline void bch_btree_iter_next_check(struct btree_iter *iter) {}
#define EBUG_ON(cond) do { if (cond); } while (0)
#define expensive_debug_checks(c) 0 #define expensive_debug_checks(c) 0
#define key_merging_disabled(c) 0 #define key_merging_disabled(c) 0
#define bypass_torture_test(d) 0 #define bypass_torture_test(d) 0
#endif #endif
#define bch_count_data(b) \
(expensive_debug_checks((b)->c) ? __bch_count_data(b) : -1)
#define bch_check_keys(b, ...) \
do { \
if (expensive_debug_checks((b)->c)) \
__bch_check_keys(b, __VA_ARGS__); \
} while (0)
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
void bch_debug_init_cache_set(struct cache_set *); void bch_debug_init_cache_set(struct cache_set *);
#else #else
......
This diff is collapsed.
#ifndef _BCACHE_EXTENTS_H
#define _BCACHE_EXTENTS_H
extern const struct btree_keys_ops bch_btree_keys_ops;
extern const struct btree_keys_ops bch_extent_keys_ops;
struct bkey;
struct cache_set;
void bch_extent_to_text(char *, size_t, const struct bkey *);
bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
#endif /* _BCACHE_EXTENTS_H */
This diff is collapsed.
...@@ -104,6 +104,7 @@ struct journal { ...@@ -104,6 +104,7 @@ struct journal {
/* used when waiting because the journal was full */ /* used when waiting because the journal was full */
struct closure_waitlist wait; struct closure_waitlist wait;
struct closure io; struct closure io;
int io_in_flight;
struct delayed_work work; struct delayed_work work;
/* Number of blocks free in the bucket(s) we're currently writing to */ /* Number of blocks free in the bucket(s) we're currently writing to */
......
...@@ -211,7 +211,7 @@ void bch_moving_gc(struct cache_set *c) ...@@ -211,7 +211,7 @@ void bch_moving_gc(struct cache_set *c)
for_each_cache(ca, c, i) { for_each_cache(ca, c, i) {
unsigned sectors_to_move = 0; unsigned sectors_to_move = 0;
unsigned reserve_sectors = ca->sb.bucket_size * unsigned reserve_sectors = ca->sb.bucket_size *
min(fifo_used(&ca->free), ca->free.size / 2); fifo_used(&ca->free[RESERVE_MOVINGGC]);
ca->heap.used = 0; ca->heap.used = 0;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -6103,6 +6103,7 @@ static int run(struct mddev *mddev) ...@@ -6103,6 +6103,7 @@ static int run(struct mddev *mddev)
blk_queue_io_min(mddev->queue, chunk_size); blk_queue_io_min(mddev->queue, chunk_size);
blk_queue_io_opt(mddev->queue, chunk_size * blk_queue_io_opt(mddev->queue, chunk_size *
(conf->raid_disks - conf->max_degraded)); (conf->raid_disks - conf->max_degraded));
mddev->queue->limits.raid_partial_stripes_expensive = 1;
/* /*
* We can only discard a whole stripe. It doesn't make sense to * We can only discard a whole stripe. It doesn't make sense to
* discard data disk but write parity disk * discard data disk but write parity disk
......
...@@ -1312,7 +1312,7 @@ static void bh_lru_install(struct buffer_head *bh) ...@@ -1312,7 +1312,7 @@ static void bh_lru_install(struct buffer_head *bh)
} }
while (out < BH_LRU_SIZE) while (out < BH_LRU_SIZE)
bhs[out++] = NULL; bhs[out++] = NULL;
memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs)); memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
} }
bh_lru_unlock(); bh_lru_unlock();
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment