Commit 776e49e8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.6/dm-fixes' of...

Merge tag 'for-5.6/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Fix request-based DM's congestion_fn and actually wire it up to the
   bdi.

 - Extend dm-bio-record to track additional struct bio members needed by
   DM integrity target.

 - Fix DM core to properly advertise that a device is suspended during
   unload (between the presuspend and postsuspend hooks). This change is
   a prereq for related DM integrity and DM writecache fixes. It
   elevates DM integrity's 'suspending' state tracking to DM core.

 - Four stable fixes for DM integrity target.

 - Fix crash in DM cache target due to incorrect work item cancelling.

 - Fix DM thin metadata lockdep warning that was introduced during 5.6
   merge window.

 - Fix DM zoned target's chunk work refcounting that regressed during
   recent conversion to refcount_t.

 - Bump the minor version for DM core and all target versions that have
   seen interface changes or important fixes during the 5.6 cycle.

* tag 'for-5.6/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm: bump version of core and various targets
  dm: fix congested_fn for request-based device
  dm integrity: use dm_bio_record and dm_bio_restore
  dm bio record: save/restore bi_end_io and bi_integrity
  dm zoned: Fix reference counter initial value of chunk works
  dm writecache: verify watermark during resume
  dm: report suspended device during destroy
  dm thin metadata: fix lockdep complaint
  dm cache: fix a crash due to incorrect work item cancelling
  dm integrity: fix invalid table returned due to argument count mismatch
  dm integrity: fix a deadlock due to offloading to an incorrect workqueue
  dm integrity: fix recalculation when moving from journal mode to bitmap mode
parents 8b614cb8 636be424
......@@ -20,8 +20,13 @@
struct dm_bio_details {
struct gendisk *bi_disk;
u8 bi_partno;
int __bi_remaining;
unsigned long bi_flags;
struct bvec_iter bi_iter;
bio_end_io_t *bi_end_io;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
struct bio_integrity_payload *bi_integrity;
#endif
};
static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
......@@ -30,6 +35,11 @@ static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
bd->bi_partno = bio->bi_partno;
bd->bi_flags = bio->bi_flags;
bd->bi_iter = bio->bi_iter;
bd->__bi_remaining = atomic_read(&bio->__bi_remaining);
bd->bi_end_io = bio->bi_end_io;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
bd->bi_integrity = bio_integrity(bio);
#endif
}
static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
......@@ -38,6 +48,11 @@ static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
bio->bi_partno = bd->bi_partno;
bio->bi_flags = bd->bi_flags;
bio->bi_iter = bd->bi_iter;
atomic_set(&bio->__bi_remaining, bd->__bi_remaining);
bio->bi_end_io = bd->bi_end_io;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
bio->bi_integrity = bd->bi_integrity;
#endif
}
#endif
......@@ -2846,8 +2846,8 @@ static void cache_postsuspend(struct dm_target *ti)
prevent_background_work(cache);
BUG_ON(atomic_read(&cache->nr_io_migrations));
cancel_delayed_work(&cache->waker);
flush_workqueue(cache->wq);
cancel_delayed_work_sync(&cache->waker);
drain_workqueue(cache->wq);
WARN_ON(cache->tracker.in_flight);
/*
......@@ -3492,7 +3492,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = {
.name = "cache",
.version = {2, 1, 0},
.version = {2, 2, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,
......
......@@ -6,6 +6,8 @@
* This file is released under the GPL.
*/
#include "dm-bio-record.h"
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/device-mapper.h>
......@@ -201,17 +203,19 @@ struct dm_integrity_c {
__u8 log2_blocks_per_bitmap_bit;
unsigned char mode;
int suspending;
int failed;
struct crypto_shash *internal_hash;
struct dm_target *ti;
/* these variables are locked with endio_wait.lock */
struct rb_root in_progress;
struct list_head wait_list;
wait_queue_head_t endio_wait;
struct workqueue_struct *wait_wq;
struct workqueue_struct *offload_wq;
unsigned char commit_seq;
commit_id_t commit_ids[N_COMMIT_IDS];
......@@ -293,11 +297,7 @@ struct dm_integrity_io {
struct completion *completion;
struct gendisk *orig_bi_disk;
u8 orig_bi_partno;
bio_end_io_t *orig_bi_end_io;
struct bio_integrity_payload *orig_bi_integrity;
struct bvec_iter orig_bi_iter;
struct dm_bio_details bio_details;
};
struct journal_completion {
......@@ -1439,7 +1439,7 @@ static void dec_in_flight(struct dm_integrity_io *dio)
dio->range.logical_sector += dio->range.n_sectors;
bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
INIT_WORK(&dio->work, integrity_bio_wait);
queue_work(ic->wait_wq, &dio->work);
queue_work(ic->offload_wq, &dio->work);
return;
}
do_endio_flush(ic, dio);
......@@ -1450,14 +1450,9 @@ static void integrity_end_io(struct bio *bio)
{
struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
bio->bi_iter = dio->orig_bi_iter;
bio->bi_disk = dio->orig_bi_disk;
bio->bi_partno = dio->orig_bi_partno;
if (dio->orig_bi_integrity) {
bio->bi_integrity = dio->orig_bi_integrity;
dm_bio_restore(&dio->bio_details, bio);
if (bio->bi_integrity)
bio->bi_opf |= REQ_INTEGRITY;
}
bio->bi_end_io = dio->orig_bi_end_io;
if (dio->completion)
complete(dio->completion);
......@@ -1542,7 +1537,7 @@ static void integrity_metadata(struct work_struct *w)
}
}
__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
__bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
unsigned pos;
char *mem, *checksums_ptr;
......@@ -1586,7 +1581,7 @@ static void integrity_metadata(struct work_struct *w)
if (likely(checksums != checksums_onstack))
kfree(checksums);
} else {
struct bio_integrity_payload *bip = dio->orig_bi_integrity;
struct bio_integrity_payload *bip = dio->bio_details.bi_integrity;
if (bip) {
struct bio_vec biv;
......@@ -1865,7 +1860,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
if (need_sync_io && from_map) {
INIT_WORK(&dio->work, integrity_bio_wait);
queue_work(ic->metadata_wq, &dio->work);
queue_work(ic->offload_wq, &dio->work);
return;
}
......@@ -2005,20 +2000,13 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
} else
dio->completion = NULL;
dio->orig_bi_iter = bio->bi_iter;
dio->orig_bi_disk = bio->bi_disk;
dio->orig_bi_partno = bio->bi_partno;
dm_bio_record(&dio->bio_details, bio);
bio_set_dev(bio, ic->dev->bdev);
dio->orig_bi_integrity = bio_integrity(bio);
bio->bi_integrity = NULL;
bio->bi_opf &= ~REQ_INTEGRITY;
dio->orig_bi_end_io = bio->bi_end_io;
bio->bi_end_io = integrity_end_io;
bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT;
generic_make_request(bio);
if (need_sync_io) {
......@@ -2315,7 +2303,7 @@ static void integrity_writer(struct work_struct *w)
unsigned prev_free_sectors;
/* the following test is not needed, but it tests the replay code */
if (READ_ONCE(ic->suspending) && !ic->meta_dev)
if (unlikely(dm_suspended(ic->ti)) && !ic->meta_dev)
return;
spin_lock_irq(&ic->endio_wait.lock);
......@@ -2376,7 +2364,7 @@ static void integrity_recalc(struct work_struct *w)
next_chunk:
if (unlikely(READ_ONCE(ic->suspending)))
if (unlikely(dm_suspended(ic->ti)))
goto unlock_ret;
range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
......@@ -2501,7 +2489,7 @@ static void bitmap_block_work(struct work_struct *w)
dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
remove_range(ic, &dio->range);
INIT_WORK(&dio->work, integrity_bio_wait);
queue_work(ic->wait_wq, &dio->work);
queue_work(ic->offload_wq, &dio->work);
} else {
block_bitmap_op(ic, ic->journal, dio->range.logical_sector,
dio->range.n_sectors, BITMAP_OP_SET);
......@@ -2524,7 +2512,7 @@ static void bitmap_block_work(struct work_struct *w)
remove_range(ic, &dio->range);
INIT_WORK(&dio->work, integrity_bio_wait);
queue_work(ic->wait_wq, &dio->work);
queue_work(ic->offload_wq, &dio->work);
}
queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
......@@ -2804,8 +2792,6 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
del_timer_sync(&ic->autocommit_timer);
WRITE_ONCE(ic->suspending, 1);
if (ic->recalc_wq)
drain_workqueue(ic->recalc_wq);
......@@ -2834,8 +2820,6 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
#endif
}
WRITE_ONCE(ic->suspending, 0);
BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
ic->journal_uptodate = true;
......@@ -2888,17 +2872,24 @@ static void dm_integrity_resume(struct dm_target *ti)
} else {
replay_journal(ic);
if (ic->mode == 'B') {
int mode;
ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
if (unlikely(r))
dm_integrity_io_error(ic, "writing superblock", r);
mode = ic->recalculate_flag ? BITMAP_OP_SET : BITMAP_OP_CLEAR;
block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, mode);
block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, mode);
block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, mode);
block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors) {
block_bitmap_op(ic, ic->journal, le64_to_cpu(ic->sb->recalc_sector),
ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
block_bitmap_op(ic, ic->recalc_bitmap, le64_to_cpu(ic->sb->recalc_sector),
ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector),
ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
}
rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
}
......@@ -2967,7 +2958,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
DMEMIT(" meta_device:%s", ic->meta_dev->name);
if (ic->sectors_per_block != 1)
DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
if (ic->recalculate_flag)
if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
DMEMIT(" recalculate");
DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
......@@ -3623,6 +3614,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
ti->private = ic;
ti->per_io_data_size = sizeof(struct dm_integrity_io);
ic->ti = ti;
ic->in_progress = RB_ROOT;
INIT_LIST_HEAD(&ic->wait_list);
......@@ -3836,6 +3828,14 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM,
METADATA_WORKQUEUE_MAX_ACTIVE);
if (!ic->offload_wq) {
ti->error = "Cannot allocate workqueue";
r = -ENOMEM;
goto bad;
}
ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1);
if (!ic->commit_wq) {
ti->error = "Cannot allocate workqueue";
......@@ -4140,6 +4140,8 @@ static void dm_integrity_dtr(struct dm_target *ti)
destroy_workqueue(ic->metadata_wq);
if (ic->wait_wq)
destroy_workqueue(ic->wait_wq);
if (ic->offload_wq)
destroy_workqueue(ic->offload_wq);
if (ic->commit_wq)
destroy_workqueue(ic->commit_wq);
if (ic->writer_wq)
......@@ -4200,7 +4202,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
static struct target_type integrity_target = {
.name = "integrity",
.version = {1, 4, 0},
.version = {1, 5, 0},
.module = THIS_MODULE,
.features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
.ctr = dm_integrity_ctr,
......
......@@ -2053,7 +2053,7 @@ static int multipath_busy(struct dm_target *ti)
*---------------------------------------------------------------*/
static struct target_type multipath_target = {
.name = "multipath",
.version = {1, 13, 0},
.version = {1, 14, 0},
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
DM_TARGET_PASSES_INTEGRITY,
.module = THIS_MODULE,
......
......@@ -960,9 +960,9 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
DMWARN("%s: __commit_transaction() failed, error = %d",
__func__, r);
}
pmd_write_unlock(pmd);
if (!pmd->fail_io)
__destroy_persistent_data_objects(pmd);
pmd_write_unlock(pmd);
kfree(pmd);
return 0;
......
......@@ -1221,7 +1221,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
static struct target_type verity_target = {
.name = "verity",
.version = {1, 5, 0},
.version = {1, 6, 0},
.module = THIS_MODULE,
.ctr = verity_ctr,
.dtr = verity_dtr,
......
......@@ -625,6 +625,12 @@ static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry
wc->freelist_size++;
}
static inline void writecache_verify_watermark(struct dm_writecache *wc)
{
if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark))
queue_work(wc->writeback_wq, &wc->writeback_work);
}
static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, sector_t expected_sector)
{
struct wc_entry *e;
......@@ -650,8 +656,8 @@ static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, s
list_del(&e->lru);
}
wc->freelist_size--;
if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark))
queue_work(wc->writeback_wq, &wc->writeback_work);
writecache_verify_watermark(wc);
return e;
}
......@@ -842,7 +848,7 @@ static void writecache_suspend(struct dm_target *ti)
}
wc_unlock(wc);
flush_workqueue(wc->writeback_wq);
drain_workqueue(wc->writeback_wq);
wc_lock(wc);
if (flush_on_suspend)
......@@ -965,6 +971,8 @@ static void writecache_resume(struct dm_target *ti)
writecache_commit_flushed(wc, false);
}
writecache_verify_watermark(wc);
wc_unlock(wc);
}
......@@ -2312,7 +2320,7 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
static struct target_type writecache_target = {
.name = "writecache",
.version = {1, 1, 1},
.version = {1, 2, 0},
.module = THIS_MODULE,
.ctr = writecache_ctr,
.dtr = writecache_dtr,
......
......@@ -533,8 +533,9 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
/* Get the BIO chunk work. If one is not active yet, create one */
cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk);
if (!cw) {
if (cw) {
dmz_get_chunk_work(cw);
} else {
/* Create a new chunk work */
cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO);
if (unlikely(!cw)) {
......@@ -543,7 +544,7 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
}
INIT_WORK(&cw->work, dmz_chunk_work);
refcount_set(&cw->refcount, 0);
refcount_set(&cw->refcount, 1);
cw->target = dmz;
cw->chunk = chunk;
bio_list_init(&cw->bio_list);
......@@ -556,7 +557,6 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
}
bio_list_add(&cw->bio_list, bio);
dmz_get_chunk_work(cw);
dmz_reclaim_bio_acc(dmz->reclaim);
if (queue_work(dmz->chunk_wq, &cw->work))
......@@ -967,7 +967,7 @@ static int dmz_iterate_devices(struct dm_target *ti,
static struct target_type dmz_type = {
.name = "zoned",
.version = {1, 0, 0},
.version = {1, 1, 0},
.features = DM_TARGET_SINGLETON | DM_TARGET_ZONED_HM,
.module = THIS_MODULE,
.ctr = dmz_ctr,
......
......@@ -1788,7 +1788,8 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
* With request-based DM we only need to check the
* top-level queue for congestion.
*/
r = md->queue->backing_dev_info->wb.state & bdi_bits;
struct backing_dev_info *bdi = md->queue->backing_dev_info;
r = bdi->wb.congested->state & bdi_bits;
} else {
map = dm_get_live_table_fast(md);
if (map)
......@@ -1854,15 +1855,6 @@ static const struct dax_operations dm_dax_ops;
static void dm_wq_work(struct work_struct *work);
static void dm_init_normal_md_queue(struct mapped_device *md)
{
/*
* Initialize aspects of queue that aren't relevant for blk-mq
*/
md->queue->backing_dev_info->congested_data = md;
md->queue->backing_dev_info->congested_fn = dm_any_congested;
}
static void cleanup_mapped_device(struct mapped_device *md)
{
if (md->wq)
......@@ -2249,6 +2241,12 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
}
EXPORT_SYMBOL_GPL(dm_get_queue_limits);
static void dm_init_congested_fn(struct mapped_device *md)
{
md->queue->backing_dev_info->congested_data = md;
md->queue->backing_dev_info->congested_fn = dm_any_congested;
}
/*
* Setup the DM device's queue based on md's type
*/
......@@ -2265,11 +2263,12 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
DMERR("Cannot initialize queue for request-based dm-mq mapped device");
return r;
}
dm_init_congested_fn(md);
break;
case DM_TYPE_BIO_BASED:
case DM_TYPE_DAX_BIO_BASED:
case DM_TYPE_NVME_BIO_BASED:
dm_init_normal_md_queue(md);
dm_init_congested_fn(md);
break;
case DM_TYPE_NONE:
WARN_ON_ONCE(true);
......@@ -2368,6 +2367,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
map = dm_get_live_table(md, &srcu_idx);
if (!dm_suspended_md(md)) {
dm_table_presuspend_targets(map);
set_bit(DMF_SUSPENDED, &md->flags);
dm_table_postsuspend_targets(map);
}
/* dm_put_live_table must be before msleep, otherwise deadlock is possible */
......
......@@ -272,9 +272,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4
#define DM_VERSION_MINOR 41
#define DM_VERSION_MINOR 42
#define DM_VERSION_PATCHLEVEL 0
#define DM_VERSION_EXTRA "-ioctl (2019-09-16)"
#define DM_VERSION_EXTRA "-ioctl (2020-02-27)"
/* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment