Commit 0364249d authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.7/dm-changes' of...

Merge tag 'for-6.7/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - Update DM core to directly call the map function for both the linear
   and stripe targets; which are provided by DM core

 - Various updates to use new safer string functions

 - Update DM core to respect REQ_NOWAIT flag in normal bios so that
   memory allocations are always attempted with GFP_NOWAIT

 - Add Mikulas Patocka to MAINTAINERS as a DM maintainer!

 - Improve DM delay target's handling of short delays (< 50ms) by using
   a kthread to check expiration of IOs rather than timers and a wq

 - Update the DM error target so that it works with zoned storage. This
   helps xfstests to provide proper IO error handling coverage when
   testing a filesystem with native zoned storage support

 - Update both DM crypt and integrity targets to improve performance by
   using crypto_shash_digest() rather than init+update+final sequence

 - Fix DM crypt target by backfilling missing memory allocation
   accounting for compound pages

* tag 'for-6.7/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm crypt: account large pages in cc->n_allocated_pages
  dm integrity: use crypto_shash_digest() in sb_mac()
  dm crypt: use crypto_shash_digest() in crypt_iv_tcw_whitening()
  dm error: Add support for zoned block devices
  dm delay: for short delays, use kthread instead of timers and wq
  MAINTAINERS: add Mikulas Patocka as a DM maintainer
  dm: respect REQ_NOWAIT flag in normal bios issued to DM
  dm: enhance alloc_multiple_bios() to be more versatile
  dm: make __send_duplicate_bios return unsigned int
  dm log userspace: replace deprecated strncpy with strscpy
  dm ioctl: replace deprecated strncpy with strscpy_pad
  dm crypt: replace open-coded kmemdup_nul
  dm cache metadata: replace deprecated strncpy with strscpy
  dm: shortcut the calls to linear_map and stripe_map
parents 39714efc 9793c269
......@@ -6028,6 +6028,7 @@ F: include/linux/devm-helpers.h
DEVICE-MAPPER (LVM)
M: Alasdair Kergon <agk@redhat.com>
M: Mike Snitzer <snitzer@kernel.org>
M: Mikulas Patocka <mpatocka@redhat.com>
M: dm-devel@lists.linux.dev
L: dm-devel@lists.linux.dev
S: Maintained
......
......@@ -597,7 +597,7 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd,
cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
strscpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
......@@ -707,7 +707,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
strscpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
......@@ -1726,7 +1726,7 @@ static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po
(strlen(policy_name) > sizeof(cmd->policy_name) - 1))
return -EINVAL;
strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
strscpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
hint_size = dm_cache_policy_get_hint_size(policy);
......
......@@ -652,13 +652,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc,
/* calculate crc32 for every 32bit part and xor it */
desc->tfm = tcw->crc32_tfm;
for (i = 0; i < 4; i++) {
r = crypto_shash_init(desc);
if (r)
goto out;
r = crypto_shash_update(desc, &buf[i * 4], 4);
if (r)
goto out;
r = crypto_shash_final(desc, &buf[i * 4]);
r = crypto_shash_digest(desc, &buf[i * 4], 4, &buf[i * 4]);
if (r)
goto out;
}
......@@ -1699,11 +1693,17 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size)
order = min(order, remaining_order);
while (order > 0) {
if (unlikely(percpu_counter_read_positive(&cc->n_allocated_pages) +
(1 << order) > dm_crypt_pages_per_client))
goto decrease_order;
pages = alloc_pages(gfp_mask
| __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | __GFP_COMP,
order);
if (likely(pages != NULL))
if (likely(pages != NULL)) {
percpu_counter_add(&cc->n_allocated_pages, 1 << order);
goto have_pages;
}
decrease_order:
order--;
}
......@@ -1741,10 +1741,13 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
if (clone->bi_vcnt > 0) { /* bio_for_each_folio_all crashes with an empty bio */
bio_for_each_folio_all(fi, clone) {
if (folio_test_large(fi.folio))
if (folio_test_large(fi.folio)) {
percpu_counter_sub(&cc->n_allocated_pages,
1 << folio_order(fi.folio));
folio_put(fi.folio);
else
} else {
mempool_free(&fi.folio->page, &cc->page_pool);
}
}
}
}
......@@ -2859,10 +2862,9 @@ static int crypt_ctr_auth_cipher(struct crypt_config *cc, char *cipher_api)
if (!start || !end || ++start > end)
return -EINVAL;
mac_alg = kzalloc(end - start + 1, GFP_KERNEL);
mac_alg = kmemdup_nul(start, end - start, GFP_KERNEL);
if (!mac_alg)
return -ENOMEM;
strncpy(mac_alg, start, end - start);
mac = crypto_alloc_ahash(mac_alg, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
kfree(mac_alg);
......
......@@ -13,6 +13,7 @@
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/device-mapper.h>
......@@ -31,6 +32,7 @@ struct delay_c {
struct workqueue_struct *kdelayd_wq;
struct work_struct flush_expired_bios;
struct list_head delayed_bios;
struct task_struct *worker;
atomic_t may_delay;
struct delay_class read;
......@@ -66,6 +68,44 @@ static void queue_timeout(struct delay_c *dc, unsigned long expires)
mutex_unlock(&dc->timer_lock);
}
static inline bool delay_is_fast(struct delay_c *dc)
{
return !!dc->worker;
}
static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
{
struct dm_delay_info *delayed, *next;
mutex_lock(&delayed_bios_lock);
list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
if (flush_all || time_after_eq(jiffies, delayed->expires)) {
struct bio *bio = dm_bio_from_per_bio_data(delayed,
sizeof(struct dm_delay_info));
list_del(&delayed->list);
dm_submit_bio_remap(bio, NULL);
delayed->class->ops--;
}
}
mutex_unlock(&delayed_bios_lock);
}
static int flush_worker_fn(void *data)
{
struct delay_c *dc = data;
while (1) {
flush_delayed_bios_fast(dc, false);
if (unlikely(list_empty(&dc->delayed_bios))) {
set_current_state(TASK_INTERRUPTIBLE);
schedule();
} else
cond_resched();
}
return 0;
}
static void flush_bios(struct bio *bio)
{
struct bio *n;
......@@ -78,7 +118,7 @@ static void flush_bios(struct bio *bio)
}
}
static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
{
struct dm_delay_info *delayed, *next;
unsigned long next_expires = 0;
......@@ -115,7 +155,10 @@ static void flush_expired_bios(struct work_struct *work)
struct delay_c *dc;
dc = container_of(work, struct delay_c, flush_expired_bios);
flush_bios(flush_delayed_bios(dc, 0));
if (delay_is_fast(dc))
flush_delayed_bios_fast(dc, false);
else
flush_bios(flush_delayed_bios(dc, false));
}
static void delay_dtr(struct dm_target *ti)
......@@ -131,8 +174,11 @@ static void delay_dtr(struct dm_target *ti)
dm_put_device(ti, dc->write.dev);
if (dc->flush.dev)
dm_put_device(ti, dc->flush.dev);
if (dc->worker)
kthread_stop(dc->worker);
mutex_destroy(&dc->timer_lock);
if (!delay_is_fast(dc))
mutex_destroy(&dc->timer_lock);
kfree(dc);
}
......@@ -175,6 +221,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct delay_c *dc;
int ret;
unsigned int max_delay;
if (argc != 3 && argc != 6 && argc != 9) {
ti->error = "Requires exactly 3, 6 or 9 arguments";
......@@ -188,16 +235,14 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
ti->private = dc;
timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
INIT_LIST_HEAD(&dc->delayed_bios);
mutex_init(&dc->timer_lock);
atomic_set(&dc->may_delay, 1);
dc->argc = argc;
ret = delay_class_ctr(ti, &dc->read, argv);
if (ret)
goto bad;
max_delay = dc->read.delay;
if (argc == 3) {
ret = delay_class_ctr(ti, &dc->write, argv);
......@@ -206,6 +251,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ret = delay_class_ctr(ti, &dc->flush, argv);
if (ret)
goto bad;
max_delay = max(max_delay, dc->write.delay);
max_delay = max(max_delay, dc->flush.delay);
goto out;
}
......@@ -216,19 +263,37 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ret = delay_class_ctr(ti, &dc->flush, argv + 3);
if (ret)
goto bad;
max_delay = max(max_delay, dc->flush.delay);
goto out;
}
ret = delay_class_ctr(ti, &dc->flush, argv + 6);
if (ret)
goto bad;
max_delay = max(max_delay, dc->flush.delay);
out:
dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
if (!dc->kdelayd_wq) {
ret = -EINVAL;
DMERR("Couldn't start kdelayd");
goto bad;
if (max_delay < 50) {
/*
* In case of small requested delays, use kthread instead of
* timers and workqueue to achieve better latency.
*/
dc->worker = kthread_create(&flush_worker_fn, dc,
"dm-delay-flush-worker");
if (IS_ERR(dc->worker)) {
ret = PTR_ERR(dc->worker);
goto bad;
}
} else {
timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
mutex_init(&dc->timer_lock);
dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
if (!dc->kdelayd_wq) {
ret = -EINVAL;
DMERR("Couldn't start kdelayd");
goto bad;
}
}
ti->num_flush_bios = 1;
......@@ -260,7 +325,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
list_add_tail(&delayed->list, &dc->delayed_bios);
mutex_unlock(&delayed_bios_lock);
queue_timeout(dc, expires);
if (delay_is_fast(dc))
wake_up_process(dc->worker);
else
queue_timeout(dc, expires);
return DM_MAPIO_SUBMITTED;
}
......@@ -270,8 +338,13 @@ static void delay_presuspend(struct dm_target *ti)
struct delay_c *dc = ti->private;
atomic_set(&dc->may_delay, 0);
del_timer_sync(&dc->delay_timer);
flush_bios(flush_delayed_bios(dc, 1));
if (delay_is_fast(dc))
flush_delayed_bios_fast(dc, true);
else {
del_timer_sync(&dc->delay_timer);
flush_bios(flush_delayed_bios(dc, true));
}
}
static void delay_resume(struct dm_target *ti)
......@@ -356,7 +429,7 @@ static int delay_iterate_devices(struct dm_target *ti,
static struct target_type delay_target = {
.name = "delay",
.version = {1, 3, 0},
.version = {1, 4, 0},
.features = DM_TARGET_PASSES_INTEGRITY,
.module = THIS_MODULE,
.ctr = delay_ctr,
......
......@@ -493,42 +493,32 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr)
{
SHASH_DESC_ON_STACK(desc, ic->journal_mac);
int r;
unsigned int size = crypto_shash_digestsize(ic->journal_mac);
unsigned int mac_size = crypto_shash_digestsize(ic->journal_mac);
__u8 *sb = (__u8 *)ic->sb;
__u8 *mac = sb + (1 << SECTOR_SHIFT) - mac_size;
if (sizeof(struct superblock) + size > 1 << SECTOR_SHIFT) {
if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT) {
dm_integrity_io_error(ic, "digest is too long", -EINVAL);
return -EINVAL;
}
desc->tfm = ic->journal_mac;
r = crypto_shash_init(desc);
if (unlikely(r < 0)) {
dm_integrity_io_error(ic, "crypto_shash_init", r);
return r;
}
r = crypto_shash_update(desc, (__u8 *)ic->sb, (1 << SECTOR_SHIFT) - size);
if (unlikely(r < 0)) {
dm_integrity_io_error(ic, "crypto_shash_update", r);
return r;
}
if (likely(wr)) {
r = crypto_shash_final(desc, (__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size);
r = crypto_shash_digest(desc, sb, mac - sb, mac);
if (unlikely(r < 0)) {
dm_integrity_io_error(ic, "crypto_shash_final", r);
dm_integrity_io_error(ic, "crypto_shash_digest", r);
return r;
}
} else {
__u8 result[HASH_MAX_DIGESTSIZE];
__u8 actual_mac[HASH_MAX_DIGESTSIZE];
r = crypto_shash_final(desc, result);
r = crypto_shash_digest(desc, sb, mac - sb, actual_mac);
if (unlikely(r < 0)) {
dm_integrity_io_error(ic, "crypto_shash_final", r);
dm_integrity_io_error(ic, "crypto_shash_digest", r);
return r;
}
if (memcmp((__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size, result, size)) {
if (memcmp(mac, actual_mac, mac_size)) {
dm_integrity_io_error(ic, "superblock mac", -EILSEQ);
dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0);
return -EILSEQ;
......
......@@ -1295,8 +1295,8 @@ static void retrieve_status(struct dm_table *table,
spec->status = 0;
spec->sector_start = ti->begin;
spec->length = ti->len;
strncpy(spec->target_type, ti->type->name,
sizeof(spec->target_type) - 1);
strscpy_pad(spec->target_type, ti->type->name,
sizeof(spec->target_type));
outptr += sizeof(struct dm_target_spec);
remaining = len - (outptr - outbuf);
......
......@@ -85,7 +85,7 @@ static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector)
return lc->start + dm_target_offset(ti, bi_sector);
}
static int linear_map(struct dm_target *ti, struct bio *bio)
int linear_map(struct dm_target *ti, struct bio *bio)
{
struct linear_c *lc = ti->private;
......
......@@ -224,7 +224,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
lc->usr_argc = argc;
strncpy(lc->uuid, argv[0], DM_UUID_LEN);
strscpy(lc->uuid, argv[0], sizeof(lc->uuid));
argc--;
argv++;
spin_lock_init(&lc->flush_lock);
......
......@@ -268,7 +268,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
return DM_MAPIO_SUBMITTED;
}
static int stripe_map(struct dm_target *ti, struct bio *bio)
int stripe_map(struct dm_target *ti, struct bio *bio)
{
struct stripe_c *sc = ti->private;
uint32_t stripe;
......
......@@ -844,7 +844,8 @@ static bool dm_table_supports_dax(struct dm_table *t,
if (!ti->type->direct_access)
return false;
if (!ti->type->iterate_devices ||
if (dm_target_is_wildcard(ti->type) ||
!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, iterate_fn, NULL))
return false;
}
......@@ -1587,6 +1588,14 @@ static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev,
return blk_queue_zoned_model(q) != *zoned_model;
}
static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
struct request_queue *q = bdev_get_queue(dev->bdev);
return blk_queue_zoned_model(q) != BLK_ZONED_NONE;
}
/*
* Check the device zoned model based on the target feature flag. If the target
* has the DM_TARGET_ZONED_HM feature flag set, host-managed zoned devices are
......@@ -1600,6 +1609,18 @@ static bool dm_table_supports_zoned_model(struct dm_table *t,
for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i);
/*
* For the wildcard target (dm-error), if we do not have a
* backing device, we must always return false. If we have a
* backing device, the result must depend on checking zoned
* model, like for any other target. So for this, check directly
* if the target backing device is zoned as we get "false" when
* dm-error was set without a backing device.
*/
if (dm_target_is_wildcard(ti->type) &&
!ti->type->iterate_devices(ti, device_is_zoned_model, NULL))
return false;
if (dm_target_supports_zoned_hm(ti->type)) {
if (!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, device_not_zoned_model,
......
......@@ -116,8 +116,62 @@ EXPORT_SYMBOL(dm_unregister_target);
* io-err: always fails an io, useful for bringing
* up LVs that have holes in them.
*/
struct io_err_c {
struct dm_dev *dev;
sector_t start;
};
static int io_err_get_args(struct dm_target *tt, unsigned int argc, char **args)
{
unsigned long long start;
struct io_err_c *ioec;
char dummy;
int ret;
ioec = kmalloc(sizeof(*ioec), GFP_KERNEL);
if (!ioec) {
tt->error = "Cannot allocate io_err context";
return -ENOMEM;
}
ret = -EINVAL;
if (sscanf(args[1], "%llu%c", &start, &dummy) != 1 ||
start != (sector_t)start) {
tt->error = "Invalid device sector";
goto bad;
}
ioec->start = start;
ret = dm_get_device(tt, args[0], dm_table_get_mode(tt->table), &ioec->dev);
if (ret) {
tt->error = "Device lookup failed";
goto bad;
}
tt->private = ioec;
return 0;
bad:
kfree(ioec);
return ret;
}
static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
{
/*
* If we have arguments, assume it is the path to the backing
* block device and its mapping start sector (same as dm-linear).
* In this case, get the device so that we can get its limits.
*/
if (argc == 2) {
int ret = io_err_get_args(tt, argc, args);
if (ret)
return ret;
}
/*
* Return error for discards instead of -EOPNOTSUPP
*/
......@@ -129,7 +183,12 @@ static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
static void io_err_dtr(struct dm_target *tt)
{
/* empty */
struct io_err_c *ioec = tt->private;
if (ioec) {
dm_put_device(tt, ioec->dev);
kfree(ioec);
}
}
static int io_err_map(struct dm_target *tt, struct bio *bio)
......@@ -149,6 +208,45 @@ static void io_err_release_clone_rq(struct request *clone,
{
}
#ifdef CONFIG_BLK_DEV_ZONED
static sector_t io_err_map_sector(struct dm_target *ti, sector_t bi_sector)
{
struct io_err_c *ioec = ti->private;
return ioec->start + dm_target_offset(ti, bi_sector);
}
static int io_err_report_zones(struct dm_target *ti,
struct dm_report_zones_args *args, unsigned int nr_zones)
{
struct io_err_c *ioec = ti->private;
/*
* This should never be called when we do not have a backing device
* as that mean the target is not a zoned one.
*/
if (WARN_ON_ONCE(!ioec))
return -EIO;
return dm_report_zones(ioec->dev->bdev, ioec->start,
io_err_map_sector(ti, args->next_sector),
args, nr_zones);
}
#else
#define io_err_report_zones NULL
#endif
static int io_err_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data)
{
struct io_err_c *ioec = ti->private;
if (!ioec)
return 0;
return fn(ti, ioec->dev, ioec->start, ti->len, data);
}
static void io_err_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
limits->max_discard_sectors = UINT_MAX;
......@@ -165,15 +263,17 @@ static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
static struct target_type error_target = {
.name = "error",
.version = {1, 6, 0},
.features = DM_TARGET_WILDCARD,
.version = {1, 7, 0},
.features = DM_TARGET_WILDCARD | DM_TARGET_ZONED_HM,
.ctr = io_err_ctr,
.dtr = io_err_dtr,
.map = io_err_map,
.clone_and_map_rq = io_err_clone_and_map_rq,
.release_clone_rq = io_err_release_clone_rq,
.iterate_devices = io_err_iterate_devices,
.io_hints = io_err_io_hints,
.direct_access = io_err_dax_direct_access,
.report_zones = io_err_report_zones,
};
int __init dm_target_init(void)
......
......@@ -570,13 +570,15 @@ static void dm_end_io_acct(struct dm_io *io)
dm_io_acct(io, true);
}
static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio, gfp_t gfp_mask)
{
struct dm_io *io;
struct dm_target_io *tio;
struct bio *clone;
clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs);
clone = bio_alloc_clone(NULL, bio, gfp_mask, &md->mempools->io_bs);
if (unlikely(!clone))
return NULL;
tio = clone_to_tio(clone);
tio->flags = 0;
dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO);
......@@ -1425,10 +1427,17 @@ static void __map_bio(struct bio *clone)
*/
if (unlikely(dm_emulate_zone_append(md)))
r = dm_zone_map_bio(tio);
else
goto do_map;
} else {
do_map:
if (likely(ti->type->map == linear_map))
r = linear_map(ti, clone);
else if (ti->type->map == stripe_map)
r = stripe_map(ti, clone);
else
r = ti->type->map(ti, clone);
} else
r = ti->type->map(ti, clone);
}
switch (r) {
case DM_MAPIO_SUBMITTED:
......@@ -1473,15 +1482,15 @@ static void setup_split_accounting(struct clone_info *ci, unsigned int len)
static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
struct dm_target *ti, unsigned int num_bios,
unsigned *len)
unsigned *len, gfp_t gfp_flag)
{
struct bio *bio;
int try;
int try = (gfp_flag & GFP_NOWAIT) ? 0 : 1;
for (try = 0; try < 2; try++) {
for (; try < 2; try++) {
int bio_nr;
if (try)
if (try && num_bios > 1)
mutex_lock(&ci->io->md->table_devices_lock);
for (bio_nr = 0; bio_nr < num_bios; bio_nr++) {
bio = alloc_tio(ci, ti, bio_nr, len,
......@@ -1491,7 +1500,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
bio_list_add(blist, bio);
}
if (try)
if (try && num_bios > 1)
mutex_unlock(&ci->io->md->table_devices_lock);
if (bio_nr == num_bios)
return;
......@@ -1501,34 +1510,31 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
}
}
static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
unsigned int num_bios, unsigned int *len)
static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
unsigned int num_bios, unsigned int *len,
gfp_t gfp_flag)
{
struct bio_list blist = BIO_EMPTY_LIST;
struct bio *clone;
unsigned int ret = 0;
switch (num_bios) {
case 0:
break;
case 1:
if (len)
setup_split_accounting(ci, *len);
clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
__map_bio(clone);
ret = 1;
break;
default:
if (len)
setup_split_accounting(ci, *len);
/* dm_accept_partial_bio() is not supported with shared tio->len_ptr */
alloc_multiple_bios(&blist, ci, ti, num_bios, len);
while ((clone = bio_list_pop(&blist))) {
if (WARN_ON_ONCE(num_bios == 0)) /* num_bios = 0 is a bug in caller */
return 0;
/* dm_accept_partial_bio() is not supported with shared tio->len_ptr */
if (len)
setup_split_accounting(ci, *len);
/*
* Using alloc_multiple_bios(), even if num_bios is 1, to consistently
* support allocating using GFP_NOWAIT with GFP_NOIO fallback.
*/
alloc_multiple_bios(&blist, ci, ti, num_bios, len, gfp_flag);
while ((clone = bio_list_pop(&blist))) {
if (num_bios > 1)
dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO);
__map_bio(clone);
ret += 1;
}
break;
__map_bio(clone);
ret += 1;
}
return ret;
......@@ -1555,8 +1561,12 @@ static void __send_empty_flush(struct clone_info *ci)
unsigned int bios;
struct dm_target *ti = dm_table_get_target(t, i);
if (unlikely(ti->num_flush_bios == 0))
continue;
atomic_add(ti->num_flush_bios, &ci->io->io_count);
bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
NULL, GFP_NOWAIT);
atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
}
......@@ -1569,10 +1579,9 @@ static void __send_empty_flush(struct clone_info *ci)
bio_uninit(ci->bio);
}
static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
unsigned int num_bios,
unsigned int max_granularity,
unsigned int max_sectors)
static void __send_abnormal_io(struct clone_info *ci, struct dm_target *ti,
unsigned int num_bios, unsigned int max_granularity,
unsigned int max_sectors)
{
unsigned int len, bios;
......@@ -1580,7 +1589,7 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target
__max_io_len(ti, ci->sector, max_granularity, max_sectors));
atomic_add(num_bios, &ci->io->io_count);
bios = __send_duplicate_bios(ci, ti, num_bios, &len);
bios = __send_duplicate_bios(ci, ti, num_bios, &len, GFP_NOIO);
/*
* alloc_io() takes one extra reference for submission, so the
* reference won't reach 0 without the following (+1) subtraction
......@@ -1649,8 +1658,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci,
if (unlikely(!num_bios))
return BLK_STS_NOTSUPP;
__send_changing_extent_only(ci, ti, num_bios,
max_granularity, max_sectors);
__send_abnormal_io(ci, ti, num_bios, max_granularity, max_sectors);
return BLK_STS_OK;
}
......@@ -1709,10 +1718,6 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
if (unlikely(!ti))
return BLK_STS_IOERR;
if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) &&
unlikely(!dm_target_supports_nowait(ti->type)))
return BLK_STS_NOTSUPP;
if (unlikely(ci->is_abnormal_io))
return __process_abnormal_io(ci, ti);
......@@ -1724,7 +1729,17 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count);
setup_split_accounting(ci, len);
clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO);
if (unlikely(ci->bio->bi_opf & REQ_NOWAIT)) {
if (unlikely(!dm_target_supports_nowait(ti->type)))
return BLK_STS_NOTSUPP;
clone = alloc_tio(ci, ti, 0, &len, GFP_NOWAIT);
if (unlikely(!clone))
return BLK_STS_AGAIN;
} else {
clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO);
}
__map_bio(clone);
ci->sector += len;
......@@ -1733,11 +1748,11 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
return BLK_STS_OK;
}
static void init_clone_info(struct clone_info *ci, struct mapped_device *md,
static void init_clone_info(struct clone_info *ci, struct dm_io *io,
struct dm_table *map, struct bio *bio, bool is_abnormal)
{
ci->map = map;
ci->io = alloc_io(md, bio);
ci->io = io;
ci->bio = bio;
ci->is_abnormal_io = is_abnormal;
ci->submit_as_polled = false;
......@@ -1772,8 +1787,18 @@ static void dm_split_and_process_bio(struct mapped_device *md,
return;
}
init_clone_info(&ci, md, map, bio, is_abnormal);
io = ci.io;
/* Only support nowait for normal IO */
if (unlikely(bio->bi_opf & REQ_NOWAIT) && !is_abnormal) {
io = alloc_io(md, bio, GFP_NOWAIT);
if (unlikely(!io)) {
/* Unable to do anything without dm_io. */
bio_wouldblock_error(bio);
return;
}
} else {
io = alloc_io(md, bio, GFP_NOIO);
}
init_clone_info(&ci, io, map, bio, is_abnormal);
if (bio->bi_opf & REQ_PREFLUSH) {
__send_empty_flush(&ci);
......
......@@ -188,9 +188,11 @@ void dm_kobject_release(struct kobject *kobj);
/*
* Targets for linear and striped mappings
*/
int linear_map(struct dm_target *ti, struct bio *bio);
int dm_linear_init(void);
void dm_linear_exit(void);
int stripe_map(struct dm_target *ti, struct bio *bio);
int dm_stripe_init(void);
void dm_stripe_exit(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment