Commit 0364249d authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.7/dm-changes' of...

Merge tag 'for-6.7/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - Update DM core to directly call the map function for both the linear
   and stripe targets; which are provided by DM core

 - Various updates to use new safer string functions

 - Update DM core to respect REQ_NOWAIT flag in normal bios so that
   memory allocations are always attempted with GFP_NOWAIT

 - Add Mikulas Patocka to MAINTAINERS as a DM maintainer!

 - Improve DM delay target's handling of short delays (< 50ms) by using
   a kthread to check expiration of IOs rather than timers and a wq

 - Update the DM error target so that it works with zoned storage. This
   helps xfstests to provide proper IO error handling coverage when
   testing a filesystem with native zoned storage support

 - Update both DM crypt and integrity targets to improve performance by
   using crypto_shash_digest() rather than init+update+final sequence

 - Fix DM crypt target by backfilling missing memory allocation
   accounting for compound pages

* tag 'for-6.7/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm crypt: account large pages in cc->n_allocated_pages
  dm integrity: use crypto_shash_digest() in sb_mac()
  dm crypt: use crypto_shash_digest() in crypt_iv_tcw_whitening()
  dm error: Add support for zoned block devices
  dm delay: for short delays, use kthread instead of timers and wq
  MAINTAINERS: add Mikulas Patocka as a DM maintainer
  dm: respect REQ_NOWAIT flag in normal bios issued to DM
  dm: enhance alloc_multiple_bios() to be more versatile
  dm: make __send_duplicate_bios return unsigned int
  dm log userspace: replace deprecated strncpy with strscpy
  dm ioctl: replace deprecated strncpy with strscpy_pad
  dm crypt: replace open-coded kmemdup_nul
  dm cache metadata: replace deprecated strncpy with strscpy
  dm: shortcut the calls to linear_map and stripe_map
parents 39714efc 9793c269
...@@ -6028,6 +6028,7 @@ F: include/linux/devm-helpers.h ...@@ -6028,6 +6028,7 @@ F: include/linux/devm-helpers.h
DEVICE-MAPPER (LVM) DEVICE-MAPPER (LVM)
M: Alasdair Kergon <agk@redhat.com> M: Alasdair Kergon <agk@redhat.com>
M: Mike Snitzer <snitzer@kernel.org> M: Mike Snitzer <snitzer@kernel.org>
M: Mikulas Patocka <mpatocka@redhat.com>
M: dm-devel@lists.linux.dev M: dm-devel@lists.linux.dev
L: dm-devel@lists.linux.dev L: dm-devel@lists.linux.dev
S: Maintained S: Maintained
......
...@@ -597,7 +597,7 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd, ...@@ -597,7 +597,7 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd,
cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks)); cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); strscpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]); cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]); cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]); cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
...@@ -707,7 +707,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, ...@@ -707,7 +707,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); strscpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]); disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]); disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
...@@ -1726,7 +1726,7 @@ static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po ...@@ -1726,7 +1726,7 @@ static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po
(strlen(policy_name) > sizeof(cmd->policy_name) - 1)) (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
return -EINVAL; return -EINVAL;
strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); strscpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version)); memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
hint_size = dm_cache_policy_get_hint_size(policy); hint_size = dm_cache_policy_get_hint_size(policy);
......
...@@ -652,13 +652,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc, ...@@ -652,13 +652,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc,
/* calculate crc32 for every 32bit part and xor it */ /* calculate crc32 for every 32bit part and xor it */
desc->tfm = tcw->crc32_tfm; desc->tfm = tcw->crc32_tfm;
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
r = crypto_shash_init(desc); r = crypto_shash_digest(desc, &buf[i * 4], 4, &buf[i * 4]);
if (r)
goto out;
r = crypto_shash_update(desc, &buf[i * 4], 4);
if (r)
goto out;
r = crypto_shash_final(desc, &buf[i * 4]);
if (r) if (r)
goto out; goto out;
} }
...@@ -1699,11 +1693,17 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size) ...@@ -1699,11 +1693,17 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size)
order = min(order, remaining_order); order = min(order, remaining_order);
while (order > 0) { while (order > 0) {
if (unlikely(percpu_counter_read_positive(&cc->n_allocated_pages) +
(1 << order) > dm_crypt_pages_per_client))
goto decrease_order;
pages = alloc_pages(gfp_mask pages = alloc_pages(gfp_mask
| __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | __GFP_COMP, | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | __GFP_COMP,
order); order);
if (likely(pages != NULL)) if (likely(pages != NULL)) {
percpu_counter_add(&cc->n_allocated_pages, 1 << order);
goto have_pages; goto have_pages;
}
decrease_order:
order--; order--;
} }
...@@ -1741,10 +1741,13 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) ...@@ -1741,10 +1741,13 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
if (clone->bi_vcnt > 0) { /* bio_for_each_folio_all crashes with an empty bio */ if (clone->bi_vcnt > 0) { /* bio_for_each_folio_all crashes with an empty bio */
bio_for_each_folio_all(fi, clone) { bio_for_each_folio_all(fi, clone) {
if (folio_test_large(fi.folio)) if (folio_test_large(fi.folio)) {
percpu_counter_sub(&cc->n_allocated_pages,
1 << folio_order(fi.folio));
folio_put(fi.folio); folio_put(fi.folio);
else } else {
mempool_free(&fi.folio->page, &cc->page_pool); mempool_free(&fi.folio->page, &cc->page_pool);
}
} }
} }
} }
...@@ -2859,10 +2862,9 @@ static int crypt_ctr_auth_cipher(struct crypt_config *cc, char *cipher_api) ...@@ -2859,10 +2862,9 @@ static int crypt_ctr_auth_cipher(struct crypt_config *cc, char *cipher_api)
if (!start || !end || ++start > end) if (!start || !end || ++start > end)
return -EINVAL; return -EINVAL;
mac_alg = kzalloc(end - start + 1, GFP_KERNEL); mac_alg = kmemdup_nul(start, end - start, GFP_KERNEL);
if (!mac_alg) if (!mac_alg)
return -ENOMEM; return -ENOMEM;
strncpy(mac_alg, start, end - start);
mac = crypto_alloc_ahash(mac_alg, 0, CRYPTO_ALG_ALLOCATES_MEMORY); mac = crypto_alloc_ahash(mac_alg, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
kfree(mac_alg); kfree(mac_alg);
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/device-mapper.h> #include <linux/device-mapper.h>
...@@ -31,6 +32,7 @@ struct delay_c { ...@@ -31,6 +32,7 @@ struct delay_c {
struct workqueue_struct *kdelayd_wq; struct workqueue_struct *kdelayd_wq;
struct work_struct flush_expired_bios; struct work_struct flush_expired_bios;
struct list_head delayed_bios; struct list_head delayed_bios;
struct task_struct *worker;
atomic_t may_delay; atomic_t may_delay;
struct delay_class read; struct delay_class read;
...@@ -66,6 +68,44 @@ static void queue_timeout(struct delay_c *dc, unsigned long expires) ...@@ -66,6 +68,44 @@ static void queue_timeout(struct delay_c *dc, unsigned long expires)
mutex_unlock(&dc->timer_lock); mutex_unlock(&dc->timer_lock);
} }
static inline bool delay_is_fast(struct delay_c *dc)
{
return !!dc->worker;
}
static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
{
struct dm_delay_info *delayed, *next;
mutex_lock(&delayed_bios_lock);
list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
if (flush_all || time_after_eq(jiffies, delayed->expires)) {
struct bio *bio = dm_bio_from_per_bio_data(delayed,
sizeof(struct dm_delay_info));
list_del(&delayed->list);
dm_submit_bio_remap(bio, NULL);
delayed->class->ops--;
}
}
mutex_unlock(&delayed_bios_lock);
}
static int flush_worker_fn(void *data)
{
struct delay_c *dc = data;
while (1) {
flush_delayed_bios_fast(dc, false);
if (unlikely(list_empty(&dc->delayed_bios))) {
set_current_state(TASK_INTERRUPTIBLE);
schedule();
} else
cond_resched();
}
return 0;
}
static void flush_bios(struct bio *bio) static void flush_bios(struct bio *bio)
{ {
struct bio *n; struct bio *n;
...@@ -78,7 +118,7 @@ static void flush_bios(struct bio *bio) ...@@ -78,7 +118,7 @@ static void flush_bios(struct bio *bio)
} }
} }
static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
{ {
struct dm_delay_info *delayed, *next; struct dm_delay_info *delayed, *next;
unsigned long next_expires = 0; unsigned long next_expires = 0;
...@@ -115,7 +155,10 @@ static void flush_expired_bios(struct work_struct *work) ...@@ -115,7 +155,10 @@ static void flush_expired_bios(struct work_struct *work)
struct delay_c *dc; struct delay_c *dc;
dc = container_of(work, struct delay_c, flush_expired_bios); dc = container_of(work, struct delay_c, flush_expired_bios);
flush_bios(flush_delayed_bios(dc, 0)); if (delay_is_fast(dc))
flush_delayed_bios_fast(dc, false);
else
flush_bios(flush_delayed_bios(dc, false));
} }
static void delay_dtr(struct dm_target *ti) static void delay_dtr(struct dm_target *ti)
...@@ -131,8 +174,11 @@ static void delay_dtr(struct dm_target *ti) ...@@ -131,8 +174,11 @@ static void delay_dtr(struct dm_target *ti)
dm_put_device(ti, dc->write.dev); dm_put_device(ti, dc->write.dev);
if (dc->flush.dev) if (dc->flush.dev)
dm_put_device(ti, dc->flush.dev); dm_put_device(ti, dc->flush.dev);
if (dc->worker)
kthread_stop(dc->worker);
mutex_destroy(&dc->timer_lock); if (!delay_is_fast(dc))
mutex_destroy(&dc->timer_lock);
kfree(dc); kfree(dc);
} }
...@@ -175,6 +221,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -175,6 +221,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{ {
struct delay_c *dc; struct delay_c *dc;
int ret; int ret;
unsigned int max_delay;
if (argc != 3 && argc != 6 && argc != 9) { if (argc != 3 && argc != 6 && argc != 9) {
ti->error = "Requires exactly 3, 6 or 9 arguments"; ti->error = "Requires exactly 3, 6 or 9 arguments";
...@@ -188,16 +235,14 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -188,16 +235,14 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
} }
ti->private = dc; ti->private = dc;
timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
INIT_LIST_HEAD(&dc->delayed_bios); INIT_LIST_HEAD(&dc->delayed_bios);
mutex_init(&dc->timer_lock);
atomic_set(&dc->may_delay, 1); atomic_set(&dc->may_delay, 1);
dc->argc = argc; dc->argc = argc;
ret = delay_class_ctr(ti, &dc->read, argv); ret = delay_class_ctr(ti, &dc->read, argv);
if (ret) if (ret)
goto bad; goto bad;
max_delay = dc->read.delay;
if (argc == 3) { if (argc == 3) {
ret = delay_class_ctr(ti, &dc->write, argv); ret = delay_class_ctr(ti, &dc->write, argv);
...@@ -206,6 +251,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -206,6 +251,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ret = delay_class_ctr(ti, &dc->flush, argv); ret = delay_class_ctr(ti, &dc->flush, argv);
if (ret) if (ret)
goto bad; goto bad;
max_delay = max(max_delay, dc->write.delay);
max_delay = max(max_delay, dc->flush.delay);
goto out; goto out;
} }
...@@ -216,19 +263,37 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -216,19 +263,37 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ret = delay_class_ctr(ti, &dc->flush, argv + 3); ret = delay_class_ctr(ti, &dc->flush, argv + 3);
if (ret) if (ret)
goto bad; goto bad;
max_delay = max(max_delay, dc->flush.delay);
goto out; goto out;
} }
ret = delay_class_ctr(ti, &dc->flush, argv + 6); ret = delay_class_ctr(ti, &dc->flush, argv + 6);
if (ret) if (ret)
goto bad; goto bad;
max_delay = max(max_delay, dc->flush.delay);
out: out:
dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); if (max_delay < 50) {
if (!dc->kdelayd_wq) { /*
ret = -EINVAL; * In case of small requested delays, use kthread instead of
DMERR("Couldn't start kdelayd"); * timers and workqueue to achieve better latency.
goto bad; */
dc->worker = kthread_create(&flush_worker_fn, dc,
"dm-delay-flush-worker");
if (IS_ERR(dc->worker)) {
ret = PTR_ERR(dc->worker);
goto bad;
}
} else {
timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
mutex_init(&dc->timer_lock);
dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
if (!dc->kdelayd_wq) {
ret = -EINVAL;
DMERR("Couldn't start kdelayd");
goto bad;
}
} }
ti->num_flush_bios = 1; ti->num_flush_bios = 1;
...@@ -260,7 +325,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) ...@@ -260,7 +325,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
list_add_tail(&delayed->list, &dc->delayed_bios); list_add_tail(&delayed->list, &dc->delayed_bios);
mutex_unlock(&delayed_bios_lock); mutex_unlock(&delayed_bios_lock);
queue_timeout(dc, expires); if (delay_is_fast(dc))
wake_up_process(dc->worker);
else
queue_timeout(dc, expires);
return DM_MAPIO_SUBMITTED; return DM_MAPIO_SUBMITTED;
} }
...@@ -270,8 +338,13 @@ static void delay_presuspend(struct dm_target *ti) ...@@ -270,8 +338,13 @@ static void delay_presuspend(struct dm_target *ti)
struct delay_c *dc = ti->private; struct delay_c *dc = ti->private;
atomic_set(&dc->may_delay, 0); atomic_set(&dc->may_delay, 0);
del_timer_sync(&dc->delay_timer);
flush_bios(flush_delayed_bios(dc, 1)); if (delay_is_fast(dc))
flush_delayed_bios_fast(dc, true);
else {
del_timer_sync(&dc->delay_timer);
flush_bios(flush_delayed_bios(dc, true));
}
} }
static void delay_resume(struct dm_target *ti) static void delay_resume(struct dm_target *ti)
...@@ -356,7 +429,7 @@ static int delay_iterate_devices(struct dm_target *ti, ...@@ -356,7 +429,7 @@ static int delay_iterate_devices(struct dm_target *ti,
static struct target_type delay_target = { static struct target_type delay_target = {
.name = "delay", .name = "delay",
.version = {1, 3, 0}, .version = {1, 4, 0},
.features = DM_TARGET_PASSES_INTEGRITY, .features = DM_TARGET_PASSES_INTEGRITY,
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = delay_ctr, .ctr = delay_ctr,
......
...@@ -493,42 +493,32 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr) ...@@ -493,42 +493,32 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr)
{ {
SHASH_DESC_ON_STACK(desc, ic->journal_mac); SHASH_DESC_ON_STACK(desc, ic->journal_mac);
int r; int r;
unsigned int size = crypto_shash_digestsize(ic->journal_mac); unsigned int mac_size = crypto_shash_digestsize(ic->journal_mac);
__u8 *sb = (__u8 *)ic->sb;
__u8 *mac = sb + (1 << SECTOR_SHIFT) - mac_size;
if (sizeof(struct superblock) + size > 1 << SECTOR_SHIFT) { if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT) {
dm_integrity_io_error(ic, "digest is too long", -EINVAL); dm_integrity_io_error(ic, "digest is too long", -EINVAL);
return -EINVAL; return -EINVAL;
} }
desc->tfm = ic->journal_mac; desc->tfm = ic->journal_mac;
r = crypto_shash_init(desc);
if (unlikely(r < 0)) {
dm_integrity_io_error(ic, "crypto_shash_init", r);
return r;
}
r = crypto_shash_update(desc, (__u8 *)ic->sb, (1 << SECTOR_SHIFT) - size);
if (unlikely(r < 0)) {
dm_integrity_io_error(ic, "crypto_shash_update", r);
return r;
}
if (likely(wr)) { if (likely(wr)) {
r = crypto_shash_final(desc, (__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size); r = crypto_shash_digest(desc, sb, mac - sb, mac);
if (unlikely(r < 0)) { if (unlikely(r < 0)) {
dm_integrity_io_error(ic, "crypto_shash_final", r); dm_integrity_io_error(ic, "crypto_shash_digest", r);
return r; return r;
} }
} else { } else {
__u8 result[HASH_MAX_DIGESTSIZE]; __u8 actual_mac[HASH_MAX_DIGESTSIZE];
r = crypto_shash_final(desc, result); r = crypto_shash_digest(desc, sb, mac - sb, actual_mac);
if (unlikely(r < 0)) { if (unlikely(r < 0)) {
dm_integrity_io_error(ic, "crypto_shash_final", r); dm_integrity_io_error(ic, "crypto_shash_digest", r);
return r; return r;
} }
if (memcmp((__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size, result, size)) { if (memcmp(mac, actual_mac, mac_size)) {
dm_integrity_io_error(ic, "superblock mac", -EILSEQ); dm_integrity_io_error(ic, "superblock mac", -EILSEQ);
dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0); dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0);
return -EILSEQ; return -EILSEQ;
......
...@@ -1295,8 +1295,8 @@ static void retrieve_status(struct dm_table *table, ...@@ -1295,8 +1295,8 @@ static void retrieve_status(struct dm_table *table,
spec->status = 0; spec->status = 0;
spec->sector_start = ti->begin; spec->sector_start = ti->begin;
spec->length = ti->len; spec->length = ti->len;
strncpy(spec->target_type, ti->type->name, strscpy_pad(spec->target_type, ti->type->name,
sizeof(spec->target_type) - 1); sizeof(spec->target_type));
outptr += sizeof(struct dm_target_spec); outptr += sizeof(struct dm_target_spec);
remaining = len - (outptr - outbuf); remaining = len - (outptr - outbuf);
......
...@@ -85,7 +85,7 @@ static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector) ...@@ -85,7 +85,7 @@ static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector)
return lc->start + dm_target_offset(ti, bi_sector); return lc->start + dm_target_offset(ti, bi_sector);
} }
static int linear_map(struct dm_target *ti, struct bio *bio) int linear_map(struct dm_target *ti, struct bio *bio)
{ {
struct linear_c *lc = ti->private; struct linear_c *lc = ti->private;
......
...@@ -224,7 +224,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, ...@@ -224,7 +224,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
lc->usr_argc = argc; lc->usr_argc = argc;
strncpy(lc->uuid, argv[0], DM_UUID_LEN); strscpy(lc->uuid, argv[0], sizeof(lc->uuid));
argc--; argc--;
argv++; argv++;
spin_lock_init(&lc->flush_lock); spin_lock_init(&lc->flush_lock);
......
...@@ -268,7 +268,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio, ...@@ -268,7 +268,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
return DM_MAPIO_SUBMITTED; return DM_MAPIO_SUBMITTED;
} }
static int stripe_map(struct dm_target *ti, struct bio *bio) int stripe_map(struct dm_target *ti, struct bio *bio)
{ {
struct stripe_c *sc = ti->private; struct stripe_c *sc = ti->private;
uint32_t stripe; uint32_t stripe;
......
...@@ -844,7 +844,8 @@ static bool dm_table_supports_dax(struct dm_table *t, ...@@ -844,7 +844,8 @@ static bool dm_table_supports_dax(struct dm_table *t,
if (!ti->type->direct_access) if (!ti->type->direct_access)
return false; return false;
if (!ti->type->iterate_devices || if (dm_target_is_wildcard(ti->type) ||
!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, iterate_fn, NULL)) ti->type->iterate_devices(ti, iterate_fn, NULL))
return false; return false;
} }
...@@ -1587,6 +1588,14 @@ static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev, ...@@ -1587,6 +1588,14 @@ static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev,
return blk_queue_zoned_model(q) != *zoned_model; return blk_queue_zoned_model(q) != *zoned_model;
} }
static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
struct request_queue *q = bdev_get_queue(dev->bdev);
return blk_queue_zoned_model(q) != BLK_ZONED_NONE;
}
/* /*
* Check the device zoned model based on the target feature flag. If the target * Check the device zoned model based on the target feature flag. If the target
* has the DM_TARGET_ZONED_HM feature flag set, host-managed zoned devices are * has the DM_TARGET_ZONED_HM feature flag set, host-managed zoned devices are
...@@ -1600,6 +1609,18 @@ static bool dm_table_supports_zoned_model(struct dm_table *t, ...@@ -1600,6 +1609,18 @@ static bool dm_table_supports_zoned_model(struct dm_table *t,
for (unsigned int i = 0; i < t->num_targets; i++) { for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i); struct dm_target *ti = dm_table_get_target(t, i);
/*
* For the wildcard target (dm-error), if we do not have a
* backing device, we must always return false. If we have a
* backing device, the result must depend on checking zoned
* model, like for any other target. So for this, check directly
* if the target backing device is zoned as we get "false" when
* dm-error was set without a backing device.
*/
if (dm_target_is_wildcard(ti->type) &&
!ti->type->iterate_devices(ti, device_is_zoned_model, NULL))
return false;
if (dm_target_supports_zoned_hm(ti->type)) { if (dm_target_supports_zoned_hm(ti->type)) {
if (!ti->type->iterate_devices || if (!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, device_not_zoned_model, ti->type->iterate_devices(ti, device_not_zoned_model,
......
...@@ -116,8 +116,62 @@ EXPORT_SYMBOL(dm_unregister_target); ...@@ -116,8 +116,62 @@ EXPORT_SYMBOL(dm_unregister_target);
* io-err: always fails an io, useful for bringing * io-err: always fails an io, useful for bringing
* up LVs that have holes in them. * up LVs that have holes in them.
*/ */
struct io_err_c {
struct dm_dev *dev;
sector_t start;
};
static int io_err_get_args(struct dm_target *tt, unsigned int argc, char **args)
{
unsigned long long start;
struct io_err_c *ioec;
char dummy;
int ret;
ioec = kmalloc(sizeof(*ioec), GFP_KERNEL);
if (!ioec) {
tt->error = "Cannot allocate io_err context";
return -ENOMEM;
}
ret = -EINVAL;
if (sscanf(args[1], "%llu%c", &start, &dummy) != 1 ||
start != (sector_t)start) {
tt->error = "Invalid device sector";
goto bad;
}
ioec->start = start;
ret = dm_get_device(tt, args[0], dm_table_get_mode(tt->table), &ioec->dev);
if (ret) {
tt->error = "Device lookup failed";
goto bad;
}
tt->private = ioec;
return 0;
bad:
kfree(ioec);
return ret;
}
static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args) static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
{ {
/*
* If we have arguments, assume it is the path to the backing
* block device and its mapping start sector (same as dm-linear).
* In this case, get the device so that we can get its limits.
*/
if (argc == 2) {
int ret = io_err_get_args(tt, argc, args);
if (ret)
return ret;
}
/* /*
* Return error for discards instead of -EOPNOTSUPP * Return error for discards instead of -EOPNOTSUPP
*/ */
...@@ -129,7 +183,12 @@ static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args) ...@@ -129,7 +183,12 @@ static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
static void io_err_dtr(struct dm_target *tt) static void io_err_dtr(struct dm_target *tt)
{ {
/* empty */ struct io_err_c *ioec = tt->private;
if (ioec) {
dm_put_device(tt, ioec->dev);
kfree(ioec);
}
} }
static int io_err_map(struct dm_target *tt, struct bio *bio) static int io_err_map(struct dm_target *tt, struct bio *bio)
...@@ -149,6 +208,45 @@ static void io_err_release_clone_rq(struct request *clone, ...@@ -149,6 +208,45 @@ static void io_err_release_clone_rq(struct request *clone,
{ {
} }
#ifdef CONFIG_BLK_DEV_ZONED
static sector_t io_err_map_sector(struct dm_target *ti, sector_t bi_sector)
{
struct io_err_c *ioec = ti->private;
return ioec->start + dm_target_offset(ti, bi_sector);
}
static int io_err_report_zones(struct dm_target *ti,
struct dm_report_zones_args *args, unsigned int nr_zones)
{
struct io_err_c *ioec = ti->private;
/*
* This should never be called when we do not have a backing device
* as that mean the target is not a zoned one.
*/
if (WARN_ON_ONCE(!ioec))
return -EIO;
return dm_report_zones(ioec->dev->bdev, ioec->start,
io_err_map_sector(ti, args->next_sector),
args, nr_zones);
}
#else
#define io_err_report_zones NULL
#endif
static int io_err_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data)
{
struct io_err_c *ioec = ti->private;
if (!ioec)
return 0;
return fn(ti, ioec->dev, ioec->start, ti->len, data);
}
static void io_err_io_hints(struct dm_target *ti, struct queue_limits *limits) static void io_err_io_hints(struct dm_target *ti, struct queue_limits *limits)
{ {
limits->max_discard_sectors = UINT_MAX; limits->max_discard_sectors = UINT_MAX;
...@@ -165,15 +263,17 @@ static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, ...@@ -165,15 +263,17 @@ static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
static struct target_type error_target = { static struct target_type error_target = {
.name = "error", .name = "error",
.version = {1, 6, 0}, .version = {1, 7, 0},
.features = DM_TARGET_WILDCARD, .features = DM_TARGET_WILDCARD | DM_TARGET_ZONED_HM,
.ctr = io_err_ctr, .ctr = io_err_ctr,
.dtr = io_err_dtr, .dtr = io_err_dtr,
.map = io_err_map, .map = io_err_map,
.clone_and_map_rq = io_err_clone_and_map_rq, .clone_and_map_rq = io_err_clone_and_map_rq,
.release_clone_rq = io_err_release_clone_rq, .release_clone_rq = io_err_release_clone_rq,
.iterate_devices = io_err_iterate_devices,
.io_hints = io_err_io_hints, .io_hints = io_err_io_hints,
.direct_access = io_err_dax_direct_access, .direct_access = io_err_dax_direct_access,
.report_zones = io_err_report_zones,
}; };
int __init dm_target_init(void) int __init dm_target_init(void)
......
...@@ -570,13 +570,15 @@ static void dm_end_io_acct(struct dm_io *io) ...@@ -570,13 +570,15 @@ static void dm_end_io_acct(struct dm_io *io)
dm_io_acct(io, true); dm_io_acct(io, true);
} }
static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio, gfp_t gfp_mask)
{ {
struct dm_io *io; struct dm_io *io;
struct dm_target_io *tio; struct dm_target_io *tio;
struct bio *clone; struct bio *clone;
clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs); clone = bio_alloc_clone(NULL, bio, gfp_mask, &md->mempools->io_bs);
if (unlikely(!clone))
return NULL;
tio = clone_to_tio(clone); tio = clone_to_tio(clone);
tio->flags = 0; tio->flags = 0;
dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO); dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO);
...@@ -1425,10 +1427,17 @@ static void __map_bio(struct bio *clone) ...@@ -1425,10 +1427,17 @@ static void __map_bio(struct bio *clone)
*/ */
if (unlikely(dm_emulate_zone_append(md))) if (unlikely(dm_emulate_zone_append(md)))
r = dm_zone_map_bio(tio); r = dm_zone_map_bio(tio);
else
goto do_map;
} else {
do_map:
if (likely(ti->type->map == linear_map))
r = linear_map(ti, clone);
else if (ti->type->map == stripe_map)
r = stripe_map(ti, clone);
else else
r = ti->type->map(ti, clone); r = ti->type->map(ti, clone);
} else }
r = ti->type->map(ti, clone);
switch (r) { switch (r) {
case DM_MAPIO_SUBMITTED: case DM_MAPIO_SUBMITTED:
...@@ -1473,15 +1482,15 @@ static void setup_split_accounting(struct clone_info *ci, unsigned int len) ...@@ -1473,15 +1482,15 @@ static void setup_split_accounting(struct clone_info *ci, unsigned int len)
static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
struct dm_target *ti, unsigned int num_bios, struct dm_target *ti, unsigned int num_bios,
unsigned *len) unsigned *len, gfp_t gfp_flag)
{ {
struct bio *bio; struct bio *bio;
int try; int try = (gfp_flag & GFP_NOWAIT) ? 0 : 1;
for (try = 0; try < 2; try++) { for (; try < 2; try++) {
int bio_nr; int bio_nr;
if (try) if (try && num_bios > 1)
mutex_lock(&ci->io->md->table_devices_lock); mutex_lock(&ci->io->md->table_devices_lock);
for (bio_nr = 0; bio_nr < num_bios; bio_nr++) { for (bio_nr = 0; bio_nr < num_bios; bio_nr++) {
bio = alloc_tio(ci, ti, bio_nr, len, bio = alloc_tio(ci, ti, bio_nr, len,
...@@ -1491,7 +1500,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, ...@@ -1491,7 +1500,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
bio_list_add(blist, bio); bio_list_add(blist, bio);
} }
if (try) if (try && num_bios > 1)
mutex_unlock(&ci->io->md->table_devices_lock); mutex_unlock(&ci->io->md->table_devices_lock);
if (bio_nr == num_bios) if (bio_nr == num_bios)
return; return;
...@@ -1501,34 +1510,31 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, ...@@ -1501,34 +1510,31 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
} }
} }
static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
unsigned int num_bios, unsigned int *len) unsigned int num_bios, unsigned int *len,
gfp_t gfp_flag)
{ {
struct bio_list blist = BIO_EMPTY_LIST; struct bio_list blist = BIO_EMPTY_LIST;
struct bio *clone; struct bio *clone;
unsigned int ret = 0; unsigned int ret = 0;
switch (num_bios) { if (WARN_ON_ONCE(num_bios == 0)) /* num_bios = 0 is a bug in caller */
case 0: return 0;
break;
case 1: /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */
if (len) if (len)
setup_split_accounting(ci, *len); setup_split_accounting(ci, *len);
clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
__map_bio(clone); /*
ret = 1; * Using alloc_multiple_bios(), even if num_bios is 1, to consistently
break; * support allocating using GFP_NOWAIT with GFP_NOIO fallback.
default: */
if (len) alloc_multiple_bios(&blist, ci, ti, num_bios, len, gfp_flag);
setup_split_accounting(ci, *len); while ((clone = bio_list_pop(&blist))) {
/* dm_accept_partial_bio() is not supported with shared tio->len_ptr */ if (num_bios > 1)
alloc_multiple_bios(&blist, ci, ti, num_bios, len);
while ((clone = bio_list_pop(&blist))) {
dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO);
__map_bio(clone); __map_bio(clone);
ret += 1; ret += 1;
}
break;
} }
return ret; return ret;
...@@ -1555,8 +1561,12 @@ static void __send_empty_flush(struct clone_info *ci) ...@@ -1555,8 +1561,12 @@ static void __send_empty_flush(struct clone_info *ci)
unsigned int bios; unsigned int bios;
struct dm_target *ti = dm_table_get_target(t, i); struct dm_target *ti = dm_table_get_target(t, i);
if (unlikely(ti->num_flush_bios == 0))
continue;
atomic_add(ti->num_flush_bios, &ci->io->io_count); atomic_add(ti->num_flush_bios, &ci->io->io_count);
bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
NULL, GFP_NOWAIT);
atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count); atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
} }
...@@ -1569,10 +1579,9 @@ static void __send_empty_flush(struct clone_info *ci) ...@@ -1569,10 +1579,9 @@ static void __send_empty_flush(struct clone_info *ci)
bio_uninit(ci->bio); bio_uninit(ci->bio);
} }
static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, static void __send_abnormal_io(struct clone_info *ci, struct dm_target *ti,
unsigned int num_bios, unsigned int num_bios, unsigned int max_granularity,
unsigned int max_granularity, unsigned int max_sectors)
unsigned int max_sectors)
{ {
unsigned int len, bios; unsigned int len, bios;
...@@ -1580,7 +1589,7 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target ...@@ -1580,7 +1589,7 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target
__max_io_len(ti, ci->sector, max_granularity, max_sectors)); __max_io_len(ti, ci->sector, max_granularity, max_sectors));
atomic_add(num_bios, &ci->io->io_count); atomic_add(num_bios, &ci->io->io_count);
bios = __send_duplicate_bios(ci, ti, num_bios, &len); bios = __send_duplicate_bios(ci, ti, num_bios, &len, GFP_NOIO);
/* /*
* alloc_io() takes one extra reference for submission, so the * alloc_io() takes one extra reference for submission, so the
* reference won't reach 0 without the following (+1) subtraction * reference won't reach 0 without the following (+1) subtraction
...@@ -1649,8 +1658,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci, ...@@ -1649,8 +1658,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci,
if (unlikely(!num_bios)) if (unlikely(!num_bios))
return BLK_STS_NOTSUPP; return BLK_STS_NOTSUPP;
__send_changing_extent_only(ci, ti, num_bios, __send_abnormal_io(ci, ti, num_bios, max_granularity, max_sectors);
max_granularity, max_sectors);
return BLK_STS_OK; return BLK_STS_OK;
} }
...@@ -1709,10 +1718,6 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) ...@@ -1709,10 +1718,6 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
if (unlikely(!ti)) if (unlikely(!ti))
return BLK_STS_IOERR; return BLK_STS_IOERR;
if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) &&
unlikely(!dm_target_supports_nowait(ti->type)))
return BLK_STS_NOTSUPP;
if (unlikely(ci->is_abnormal_io)) if (unlikely(ci->is_abnormal_io))
return __process_abnormal_io(ci, ti); return __process_abnormal_io(ci, ti);
...@@ -1724,7 +1729,17 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) ...@@ -1724,7 +1729,17 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count);
setup_split_accounting(ci, len); setup_split_accounting(ci, len);
clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO);
if (unlikely(ci->bio->bi_opf & REQ_NOWAIT)) {
if (unlikely(!dm_target_supports_nowait(ti->type)))
return BLK_STS_NOTSUPP;
clone = alloc_tio(ci, ti, 0, &len, GFP_NOWAIT);
if (unlikely(!clone))
return BLK_STS_AGAIN;
} else {
clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO);
}
__map_bio(clone); __map_bio(clone);
ci->sector += len; ci->sector += len;
...@@ -1733,11 +1748,11 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) ...@@ -1733,11 +1748,11 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
return BLK_STS_OK; return BLK_STS_OK;
} }
static void init_clone_info(struct clone_info *ci, struct mapped_device *md, static void init_clone_info(struct clone_info *ci, struct dm_io *io,
struct dm_table *map, struct bio *bio, bool is_abnormal) struct dm_table *map, struct bio *bio, bool is_abnormal)
{ {
ci->map = map; ci->map = map;
ci->io = alloc_io(md, bio); ci->io = io;
ci->bio = bio; ci->bio = bio;
ci->is_abnormal_io = is_abnormal; ci->is_abnormal_io = is_abnormal;
ci->submit_as_polled = false; ci->submit_as_polled = false;
...@@ -1772,8 +1787,18 @@ static void dm_split_and_process_bio(struct mapped_device *md, ...@@ -1772,8 +1787,18 @@ static void dm_split_and_process_bio(struct mapped_device *md,
return; return;
} }
init_clone_info(&ci, md, map, bio, is_abnormal); /* Only support nowait for normal IO */
io = ci.io; if (unlikely(bio->bi_opf & REQ_NOWAIT) && !is_abnormal) {
io = alloc_io(md, bio, GFP_NOWAIT);
if (unlikely(!io)) {
/* Unable to do anything without dm_io. */
bio_wouldblock_error(bio);
return;
}
} else {
io = alloc_io(md, bio, GFP_NOIO);
}
init_clone_info(&ci, io, map, bio, is_abnormal);
if (bio->bi_opf & REQ_PREFLUSH) { if (bio->bi_opf & REQ_PREFLUSH) {
__send_empty_flush(&ci); __send_empty_flush(&ci);
......
...@@ -188,9 +188,11 @@ void dm_kobject_release(struct kobject *kobj); ...@@ -188,9 +188,11 @@ void dm_kobject_release(struct kobject *kobj);
/* /*
* Targets for linear and striped mappings * Targets for linear and striped mappings
*/ */
int linear_map(struct dm_target *ti, struct bio *bio);
int dm_linear_init(void); int dm_linear_init(void);
void dm_linear_exit(void); void dm_linear_exit(void);
int stripe_map(struct dm_target *ti, struct bio *bio);
int dm_stripe_init(void); int dm_stripe_init(void);
void dm_stripe_exit(void); void dm_stripe_exit(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment