Commit 0d815e34 authored by Mikulas Patocka's avatar Mikulas Patocka

dm-crypt: limit the size of encryption requests

There was a performance regression reported where dm-crypt would perform
worse on new kernels than on old kernels. The reason is that the old
kernels split the bios to NVMe request size (that is usually 65536 or
131072 bytes) and the new kernels pass the big bios through dm-crypt and
split them underneath.

If a big 1MiB bio is passed to dm-crypt, dm-crypt processes it on a single
core without parallelization and this is what causes the performance
degradation.

This commit introduces new tunable variables
/sys/module/dm_crypt/parameters/max_read_size and
/sys/module/dm_crypt/parameters/max_write_size that specify the maximum
bio size for dm-crypt. Bios larger than this value are split, so that
they can be encrypted in parallel by multiple cores. If these variables
are '0', a default 131072 is used.

Splitting bios may cause performance regressions in other workloads - if
this happens, the user should increase the value in max_read_size and
max_write_size variables.

max_read_size:
128k    2399MiB/s
256k    2368MiB/s
512k    1986MiB/s
1024    1790MiB/s

max_write_size:
128k    1712MiB/s
256k    1651MiB/s
512k    1537MiB/s
1024k   1332MiB/s

Note that if you run dm-crypt inside a virtual machine, you may need to do
"echo numa >/sys/module/workqueue/parameters/default_affinity_scope" to
improve performance.
Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Tested-by: default avatarLaurence Oberman <loberman@redhat.com>
parent 6fce1f40
...@@ -160,6 +160,17 @@ iv_large_sectors ...@@ -160,6 +160,17 @@ iv_large_sectors
The <iv_offset> must be multiple of <sector_size> (in 512 bytes units) The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
if this flag is specified. if this flag is specified.
Module parameters::
max_read_size
max_write_size
Maximum size of read or write requests. When a request larger than this size
is received, dm-crypt will split the request. The splitting improves
concurrency (the split requests could be encrypted in parallel by multiple
cores), but it also causes overhead. The user should tune these parameters to
fit the actual workload.
Example scripts Example scripts
=============== ===============
LUKS (Linux Unified Key Setup) is now the preferred way to set up disk LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
......
...@@ -241,6 +241,31 @@ static unsigned int dm_crypt_clients_n; ...@@ -241,6 +241,31 @@ static unsigned int dm_crypt_clients_n;
static volatile unsigned long dm_crypt_pages_per_client; static volatile unsigned long dm_crypt_pages_per_client;
#define DM_CRYPT_MEMORY_PERCENT 2 #define DM_CRYPT_MEMORY_PERCENT 2
#define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_VECS * 16) #define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_VECS * 16)
#define DM_CRYPT_DEFAULT_MAX_READ_SIZE 131072
#define DM_CRYPT_DEFAULT_MAX_WRITE_SIZE 131072
static unsigned int max_read_size = 0;
module_param(max_read_size, uint, 0644);
MODULE_PARM_DESC(max_read_size, "Maximum size of a read request");
static unsigned int max_write_size = 0;
module_param(max_write_size, uint, 0644);
MODULE_PARM_DESC(max_write_size, "Maximum size of a write request");
static unsigned get_max_request_size(struct crypt_config *cc, bool wrt)
{
unsigned val, sector_align;
val = !wrt ? READ_ONCE(max_read_size) : READ_ONCE(max_write_size);
if (likely(!val))
val = !wrt ? DM_CRYPT_DEFAULT_MAX_READ_SIZE : DM_CRYPT_DEFAULT_MAX_WRITE_SIZE;
if (wrt || cc->on_disk_tag_size) {
if (unlikely(val > BIO_MAX_VECS << PAGE_SHIFT))
val = BIO_MAX_VECS << PAGE_SHIFT;
}
sector_align = max(bdev_logical_block_size(cc->dev->bdev), (unsigned)cc->sector_size);
val = round_down(val, sector_align);
if (unlikely(!val))
val = sector_align;
return val >> SECTOR_SHIFT;
}
static void crypt_endio(struct bio *clone); static void crypt_endio(struct bio *clone);
static void kcryptd_queue_crypt(struct dm_crypt_io *io); static void kcryptd_queue_crypt(struct dm_crypt_io *io);
...@@ -3474,6 +3499,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) ...@@ -3474,6 +3499,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
{ {
struct dm_crypt_io *io; struct dm_crypt_io *io;
struct crypt_config *cc = ti->private; struct crypt_config *cc = ti->private;
unsigned max_sectors;
/* /*
* If bio is REQ_PREFLUSH or REQ_OP_DISCARD, just bypass crypt queues. * If bio is REQ_PREFLUSH or REQ_OP_DISCARD, just bypass crypt queues.
...@@ -3492,9 +3518,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) ...@@ -3492,9 +3518,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
/* /*
* Check if bio is too large, split as needed. * Check if bio is too large, split as needed.
*/ */
if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_VECS << PAGE_SHIFT)) && max_sectors = get_max_request_size(cc, bio_data_dir(bio) == WRITE);
(bio_data_dir(bio) == WRITE || cc->on_disk_tag_size)) if (unlikely(bio_sectors(bio) > max_sectors))
dm_accept_partial_bio(bio, ((BIO_MAX_VECS << PAGE_SHIFT) >> SECTOR_SHIFT)); dm_accept_partial_bio(bio, max_sectors);
/* /*
* Ensure that bio is a multiple of internal sector encryption size * Ensure that bio is a multiple of internal sector encryption size
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment