Commit dcd19507 authored by Mikulas Patocka's avatar Mikulas Patocka Committed by Mike Snitzer

dm writecache: improve performance of large linear writes on SSDs

When dm-writecache is used with SSD as a cache device, it would submit a
separate bio for each written block. The I/Os would be merged by the disk
scheduler, but this merging degrades performance.

Improve dm-writecache performance by submitting larger bios - this is
possible as long as there is consecutive free space on the cache
device.

Benchmark (arm64 with 64k page size, using /dev/ram0 as a cache device):

fio --bs=512k --iodepth=32 --size=400M --direct=1 \
    --filename=/dev/mapper/cache --rw=randwrite --numjobs=1 --name=test

block	old	new
size	MiB/s	MiB/s
---------------------
512	181	700
1k	347	1256
2k	644	2020
4k	1183	2759
8k	1852	3333
16k	2469	3509
32k	2974	3670
64k	3404	3810
Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent be240ff5
...@@ -625,7 +625,7 @@ static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry ...@@ -625,7 +625,7 @@ static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry
wc->freelist_size++; wc->freelist_size++;
} }
static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc) static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, sector_t expected_sector)
{ {
struct wc_entry *e; struct wc_entry *e;
...@@ -634,6 +634,8 @@ static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc) ...@@ -634,6 +634,8 @@ static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc)
if (unlikely(!wc->current_free)) if (unlikely(!wc->current_free))
return NULL; return NULL;
e = wc->current_free; e = wc->current_free;
if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector))
return NULL;
next = rb_next(&e->rb_node); next = rb_next(&e->rb_node);
rb_erase(&e->rb_node, &wc->freetree); rb_erase(&e->rb_node, &wc->freetree);
if (unlikely(!next)) if (unlikely(!next))
...@@ -643,6 +645,8 @@ static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc) ...@@ -643,6 +645,8 @@ static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc)
if (unlikely(list_empty(&wc->freelist))) if (unlikely(list_empty(&wc->freelist)))
return NULL; return NULL;
e = container_of(wc->freelist.next, struct wc_entry, lru); e = container_of(wc->freelist.next, struct wc_entry, lru);
if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector))
return NULL;
list_del(&e->lru); list_del(&e->lru);
} }
wc->freelist_size--; wc->freelist_size--;
...@@ -1193,7 +1197,7 @@ static int writecache_map(struct dm_target *ti, struct bio *bio) ...@@ -1193,7 +1197,7 @@ static int writecache_map(struct dm_target *ti, struct bio *bio)
goto bio_copy; goto bio_copy;
} }
} }
e = writecache_pop_from_freelist(wc); e = writecache_pop_from_freelist(wc, (sector_t)-1);
if (unlikely(!e)) { if (unlikely(!e)) {
writecache_wait_on_freelist(wc); writecache_wait_on_freelist(wc);
continue; continue;
...@@ -1205,9 +1209,26 @@ static int writecache_map(struct dm_target *ti, struct bio *bio) ...@@ -1205,9 +1209,26 @@ static int writecache_map(struct dm_target *ti, struct bio *bio)
if (WC_MODE_PMEM(wc)) { if (WC_MODE_PMEM(wc)) {
bio_copy_block(wc, bio, memory_data(wc, e)); bio_copy_block(wc, bio, memory_data(wc, e));
} else { } else {
dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT); unsigned bio_size = wc->block_size;
sector_t start_cache_sec = cache_sector(wc, e);
sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT);
while (bio_size < bio->bi_iter.bi_size) {
struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec);
if (!f)
break;
write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector +
(bio_size >> SECTOR_SHIFT), wc->seq_count);
writecache_insert_entry(wc, f);
wc->uncommitted_blocks++;
bio_size += wc->block_size;
current_cache_sec += wc->block_size >> SECTOR_SHIFT;
}
bio_set_dev(bio, wc->ssd_dev->bdev); bio_set_dev(bio, wc->ssd_dev->bdev);
bio->bi_iter.bi_sector = cache_sector(wc, e); bio->bi_iter.bi_sector = start_cache_sec;
dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT);
if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) {
wc->uncommitted_blocks = 0; wc->uncommitted_blocks = 0;
queue_work(wc->writeback_wq, &wc->flush_work); queue_work(wc->writeback_wq, &wc->flush_work);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment