Merge tag 'dm-4.8-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer: - initially based on Jens' 'for-4.8/core' (given all the flag churn) and later merged with 'for-4.8/core' to pickup the QUEUE_FLAG_DAX commits that DM depends on to provide its DAX support - clean up the bio-based vs request-based DM core code by moving the request-based DM core code out to dm-rq.[hc] - reinstate bio-based support in the DM multipath target (done with the idea that fast storage like NVMe over Fabrics could benefit) -- while preserving support for request_fn and blk-mq request-based DM mpath - SCSI and DM multipath persistent reservation fixes that were coordinated with Martin Petersen. - the DM raid target saw the most extensive change this cycle; it now provides reshape and takeover support (by layering ontop of the corresponding MD capabilities) - DAX support for DM core and the linear, stripe and error targets - a DM thin-provisioning block discard vs allocation race fix that addresses potential for corruption - a stable fix for DM verity-fec's block calculation during decode - a few cleanups and fixes to DM core and various targets * tag 'dm-4.8-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (73 commits) dm: allow bio-based table to be upgraded to bio-based with DAX support dm snap: add fake origin_direct_access dm stripe: add DAX support dm error: add DAX support dm linear: add DAX support dm: add infrastructure for DAX support dm thin: fix a race condition between discarding and provisioning a block dm btree: fix a bug in dm_btree_find_next_single() dm raid: fix random optimal_io_size for raid0 dm raid: address checkpatch.pl complaints dm: call PR reserve/unreserve on each underlying device sd: don't use the ALL_TG_PT bit for reservations dm: fix second blk_delay_queue() parameter to be in msec units not jiffies dm raid: change logical functions to actually return bool dm raid: use rdev_for_each in status dm raid: use rs->raid_disks to avoid memory leaks on free dm raid: support delta_disks for raid1, fix table output dm raid: enhance reshape check and factor out reshape setup dm raid: allow resize during recovery dm raid: fix rs_is_recovering() to allow for lvextend ...

Merge tag 'dm-4.8-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - initially based on Jens' 'for-4.8/core' (given all the flag churn) and later merged with 'for-4.8/core' to pickup the QUEUE_FLAG_DAX commits that DM depends on to provide its DAX support - clean up the bio-based vs request-based DM core code by moving the request-based DM core code out to dm-rq.[hc] - reinstate bio-based support in the DM multipath target (done with the idea that fast storage like NVMe over Fabrics could benefit) -- while preserving support for request_fn and blk-mq request-based DM mpath - SCSI and DM multipath persistent reservation fixes that were coordinated with Martin Petersen. - the DM raid target saw the most extensive change this cycle; it now provides reshape and takeover support (by layering ontop of the corresponding MD capabilities) - DAX support for DM core and the linear, stripe and error targets - a DM thin-provisioning block discard vs allocation race fix that addresses potential for corruption - a stable fix for DM verity-fec's block calculation during decode - a few cleanups and fixes to DM core and various targets * tag 'dm-4.8-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (73 commits) dm: allow bio-based table to be upgraded to bio-based with DAX support dm snap: add fake origin_direct_access dm stripe: add DAX support dm error: add DAX support dm linear: add DAX support dm: add infrastructure for DAX support dm thin: fix a race condition between discarding and provisioning a block dm btree: fix a bug in dm_btree_find_next_single() dm raid: fix random optimal_io_size for raid0 dm raid: address checkpatch.pl complaints dm: call PR reserve/unreserve on each underlying device sd: don't use the ALL_TG_PT bit for reservations dm: fix second blk_delay_queue() parameter to be in msec units not jiffies dm raid: change logical functions to actually return bool dm raid: use rdev_for_each in status dm raid: use rs->raid_disks to avoid memory leaks on free dm raid: support delta_disks for raid1, fix table output dm raid: enhance reshape check and factor out reshape setup dm raid: allow resize during recovery dm raid: fix rs_is_recovering() to allow for lvextend ...
f7e68169 · Linus Torvalds · 3fc9d690 · b5ab4a9b · f7e68169 · f7e68169
Commit f7e68169 authored Jul 26, 2016 by Linus Torvalds
29 changed files
--- a/Documentation/device-mapper/dm-raid.txt
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -14,8 +14,12 @@ The target is named "raid" and it accepts the following parameters:
    <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]

 <raid_type>:
+  raid0		RAID0 striping (no resilience)
  raid1		RAID1 mirroring
-  raid4		RAID4 dedicated parity disk
+  raid4		RAID4 with dedicated last parity disk
+  raid5_n 	RAID5 with dedicated last parity disk suporting takeover
+		Same as raid4
+		-Transitory layout
  raid5_la	RAID5 left asymmetric
 		- rotating parity 0 with data continuation
  raid5_ra	RAID5 right asymmetric
@@ -30,7 +34,19 @@ The target is named "raid" and it accepts the following parameters:
 		- rotating parity N (right-to-left) with data restart
  raid6_nc	RAID6 N continue
 		- rotating parity N (right-to-left) with data continuation
+  raid6_n_6	RAID6 with dedicate parity disks
+		- parity and Q-syndrome on the last 2 disks;
+		  laylout for takeover from/to raid4/raid5_n
+  raid6_la_6	Same as "raid_la" plus dedicated last Q-syndrome disk
+		- layout for takeover from raid5_la from/to raid6
+  raid6_ra_6	Same as "raid5_ra" dedicated last Q-syndrome disk
+		- layout for takeover from raid5_ra from/to raid6
+  raid6_ls_6	Same as "raid5_ls" dedicated last Q-syndrome disk
+		- layout for takeover from raid5_ls from/to raid6
+  raid6_rs_6	Same as "raid5_rs" dedicated last Q-syndrome disk
+		- layout for takeover from raid5_rs from/to raid6
  raid10        Various RAID10 inspired algorithms chosen by additional params
+		(see raid10_format and raid10_copies below)
 		- RAID10: Striped Mirrors (aka 'Striping on top of mirrors')
 		- RAID1E: Integrated Adjacent Stripe Mirroring
 		- RAID1E: Integrated Offset Stripe Mirroring
@@ -116,10 +132,41 @@ The target is named "raid" and it accepts the following parameters:
 		Here we see layouts closely akin to 'RAID1E - Integrated
 		Offset Stripe Mirroring'.

+        [delta_disks <N>]
+		The delta_disks option value (-251 < N < +251) triggers
+		device removal (negative value) or device addition (positive
+		value) to any reshape supporting raid levels 4/5/6 and 10.
+		RAID levels 4/5/6 allow for addition of devices (metadata
+		and data device tupel), raid10_near and raid10_offset only
+		allow for device addtion. raid10_far does not support any
+		reshaping at all.
+		A minimum of devices have to be kept to enforce resilience,
+		which is 3 devices for raid4/5 and 4 devices for raid6.
+
+        [data_offset <sectors>]
+		This option value defines the offset into each data device
+		where the data starts. This is used to provide out-of-place
+		reshaping space to avoid writing over data whilst
+		changing the layout of stripes, hence an interruption/crash
+		may happen at any time without the risk of losing data.
+		E.g. when adding devices to an existing raid set during
+		forward reshaping, the out-of-place space will be allocated
+		at the beginning of each raid device. The kernel raid4/5/6/10
+		MD personalities supporting such device addition will read the data from
+		the existing first stripes (those with smaller number of stripes)
+		starting at data_offset to fill up a new stripe with the larger
+		number of stripes, calculate the redundancy blocks (CRC/Q-syndrome)
+		and write that new stripe to offset 0. Same will be applied to all
+		N-1 other new stripes. This out-of-place scheme is used to change
+		the RAID type (i.e. the allocation algorithm) as well, e.g.
+		changing from raid5_ls to raid5_n.
+
 <#raid_devs>: The number of devices composing the array.
 	Each device consists of two entries.  The first is the device
 	containing the metadata (if any); the second is the one containing the
-	data.
+	data. A Maximum of 64 metadata/data device entries are supported
+	up to target version 1.8.0.
+	1.9.0 supports up to 253 which is enforced by the used MD kernel runtime.

 	If a drive has failed or is missing at creation time, a '-' can be
 	given for both the metadata and data drives for a given position.
@@ -207,7 +254,6 @@ include:
 	"recover"- Initiate/continue a recover process.
 	"check"  - Initiate a check (i.e. a "scrub") of the array.
 	"repair" - Initiate a repair of the array.
-	"reshape"- Currently unsupported (-EINVAL).


 Discard Support
@@ -257,3 +303,9 @@ Version History
 1.5.2   'mismatch_cnt' is zero unless [last_]sync_action is "check".
 1.6.0   Add discard support (and devices_handle_discard_safely module param).
 1.7.0   Add support for MD RAID0 mappings.
+1.8.0   Explictely check for compatible flags in the superblock metadata
+	and reject to start the raid set if any are set by a newer
+	target version, thus avoiding data corruption on a raid set
+	with a reshape in progress.
+1.9.0   Add support for RAID level takeover/reshape/region size
+	and set size reduction.
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -3,7 +3,8 @@
 #

 dm-mod-y	+= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
-		   dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o
+		   dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \
+		   dm-rq.o
 dm-multipath-y	+= dm-path-selector.o dm-mpath.o
 dm-snapshot-y	+= dm-snap.o dm-exception-store.o dm-snap-transient.o \
 		    dm-snap-persistent.o

--- a/drivers/md/dm-builtin.c
+++ b/drivers/md/dm-builtin.c
-#include "dm.h"
+#include "dm-core.h"

 /*
 * The kobject release method must not be placed in the module itself,

--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
+/*
+ * Internal header file _only_ for device mapper core
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef DM_CORE_INTERNAL_H
+#define DM_CORE_INTERNAL_H
+
+#include <linux/kthread.h>
+#include <linux/ktime.h>
+#include <linux/blk-mq.h>
+
+#include <trace/events/block.h>
+
+#include "dm.h"
+
+#define DM_RESERVED_MAX_IOS		1024
+
+struct dm_kobject_holder {
+	struct kobject kobj;
+	struct completion completion;
+};
+
+/*
+ * DM core internal structure that used directly by dm.c and dm-rq.c
+ * DM targets must _not_ deference a mapped_device to directly access its members!
+ */
+struct mapped_device {
+	struct srcu_struct io_barrier;
+	struct mutex suspend_lock;
+
+	/*
+	 * The current mapping (struct dm_table *).
+	 * Use dm_get_live_table{_fast} or take suspend_lock for
+	 * dereference.
+	 */
+	void __rcu *map;
+
+	struct list_head table_devices;
+	struct mutex table_devices_lock;
+
+	unsigned long flags;
+
+	struct request_queue *queue;
+	int numa_node_id;
+
+	unsigned type;
+	/* Protect queue and type against concurrent access. */
+	struct mutex type_lock;
+
+	atomic_t holders;
+	atomic_t open_count;
+
+	struct dm_target *immutable_target;
+	struct target_type *immutable_target_type;
+
+	struct gendisk *disk;
+	char name[16];
+
+	void *interface_ptr;
+
+	/*
+	 * A list of ios that arrived while we were suspended.
+	 */
+	atomic_t pending[2];
+	wait_queue_head_t wait;
+	struct work_struct work;
+	spinlock_t deferred_lock;
+	struct bio_list deferred;
+
+	/*
+	 * Event handling.
+	 */
+	wait_queue_head_t eventq;
+	atomic_t event_nr;
+	atomic_t uevent_seq;
+	struct list_head uevent_list;
+	spinlock_t uevent_lock; /* Protect access to uevent_list */
+
+	/* the number of internal suspends */
+	unsigned internal_suspend_count;
+
+	/*
+	 * Processing queue (flush)
+	 */
+	struct workqueue_struct *wq;
+
+	/*
+	 * io objects are allocated from here.
+	 */
+	mempool_t *io_pool;
+	mempool_t *rq_pool;
+
+	struct bio_set *bs;
+
+	/*
+	 * freeze/thaw support require holding onto a super block
+	 */
+	struct super_block *frozen_sb;
+
+	/* forced geometry settings */
+	struct hd_geometry geometry;
+
+	struct block_device *bdev;
+
+	/* kobject and completion */
+	struct dm_kobject_holder kobj_holder;
+
+	/* zero-length flush that will be cloned and submitted to targets */
+	struct bio flush_bio;
+
+	struct dm_stats stats;
+
+	struct kthread_worker kworker;
+	struct task_struct *kworker_task;
+
+	/* for request-based merge heuristic in dm_request_fn() */
+	unsigned seq_rq_merge_deadline_usecs;
+	int last_rq_rw;
+	sector_t last_rq_pos;
+	ktime_t last_rq_start_time;
+
+	/* for blk-mq request-based DM support */
+	struct blk_mq_tag_set *tag_set;
+	bool use_blk_mq:1;
+	bool init_tio_pdu:1;
+};
+
+void dm_init_md_queue(struct mapped_device *md);
+void dm_init_normal_md_queue(struct mapped_device *md);
+int md_in_flight(struct mapped_device *md);
+void disable_write_same(struct mapped_device *md);
+
+static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
+{
+	return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
+}
+
+unsigned __dm_get_module_param(unsigned *module_param, unsigned def, unsigned max);
+
+static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
+{
+	return !maxlen || strlen(result) + 1 >= maxlen;
+}
+
+#endif
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -683,7 +683,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc,
 				  u8 *data)
 {
 	struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw;
-	u64 sector = cpu_to_le64((u64)dmreq->iv_sector);
+	__le64 sector = cpu_to_le64(dmreq->iv_sector);
 	u8 buf[TCW_WHITENING_SIZE];
 	SHASH_DESC_ON_STACK(desc, tcw->crc32_tfm);
 	int i, r;
@@ -722,7 +722,7 @@ static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv,
 			    struct dm_crypt_request *dmreq)
 {
 	struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw;
-	u64 sector = cpu_to_le64((u64)dmreq->iv_sector);
+	__le64 sector = cpu_to_le64(dmreq->iv_sector);
 	u8 *src;
 	int r = 0;


--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -5,7 +5,7 @@
 * This file is released under the GPL.
 */

-#include "dm.h"
+#include "dm-core.h"

 #include <linux/device-mapper.h>


--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -5,7 +5,7 @@
 * This file is released under the GPL.
 */

-#include "dm.h"
+#include "dm-core.h"

 #include <linux/module.h>
 #include <linux/vmalloc.h>
@@ -1267,6 +1267,15 @@ static int populate_table(struct dm_table *table,
 	return dm_table_complete(table);
 }

+static bool is_valid_type(unsigned cur, unsigned new)
+{
+	if (cur == new ||
+	    (cur == DM_TYPE_BIO_BASED && new == DM_TYPE_DAX_BIO_BASED))
+		return true;
+
+	return false;
+}
+
 static int table_load(struct dm_ioctl *param, size_t param_size)
 {
 	int r;
@@ -1309,7 +1318,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
 			DMWARN("unable to set up device queue for new table.");
 			goto err_unlock_md_type;
 		}
-	} else if (dm_get_md_type(md) != dm_table_get_type(t)) {
+	} else if (!is_valid_type(dm_get_md_type(md), dm_table_get_type(t))) {
 		DMWARN("can't change device type after initial table load.");
 		r = -EINVAL;
 		goto err_unlock_md_type;
@@ -1670,8 +1679,7 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user)
 	return r;
 }

-#define DM_PARAMS_KMALLOC	0x0001	/* Params alloced with kmalloc */
-#define DM_PARAMS_VMALLOC	0x0002	/* Params alloced with vmalloc */
+#define DM_PARAMS_MALLOC	0x0001	/* Params allocated with kvmalloc() */
 #define DM_WIPE_BUFFER		0x0010	/* Wipe input buffer before returning from ioctl */

 static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags)
@@ -1679,10 +1687,8 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla
 	if (param_flags & DM_WIPE_BUFFER)
 		memset(param, 0, param_size);

-	if (param_flags & DM_PARAMS_KMALLOC)
-		kfree(param);
-	if (param_flags & DM_PARAMS_VMALLOC)
-		vfree(param);
+	if (param_flags & DM_PARAMS_MALLOC)
+		kvfree(param);
 }

 static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel,
@@ -1714,19 +1720,14 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
 	 * Use kmalloc() rather than vmalloc() when we can.
 	 */
 	dmi = NULL;
-	if (param_kernel->data_size <= KMALLOC_MAX_SIZE) {
+	if (param_kernel->data_size <= KMALLOC_MAX_SIZE)
 		dmi = kmalloc(param_kernel->data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
-		if (dmi)
-			*param_flags |= DM_PARAMS_KMALLOC;
-	}

 	if (!dmi) {
 		unsigned noio_flag;
 		noio_flag = memalloc_noio_save();
 		dmi = __vmalloc(param_kernel->data_size, GFP_NOIO | __GFP_HIGH | __GFP_HIGHMEM, PAGE_KERNEL);
 		memalloc_noio_restore(noio_flag);
-		if (dmi)
-			*param_flags |= DM_PARAMS_VMALLOC;
 	}

 	if (!dmi) {
@@ -1735,6 +1736,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
 		return -ENOMEM;
 	}

+	*param_flags |= DM_PARAMS_MALLOC;
+
 	if (copy_from_user(dmi, user, param_kernel->data_size))
 		goto bad;


--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -26,7 +26,7 @@
 #include <linux/device-mapper.h>
 #include <linux/dm-kcopyd.h>

-#include "dm.h"
+#include "dm-core.h"

 #define SUB_JOB_SIZE	128
 #define SPLIT_COUNT	8

--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -141,9 +141,27 @@ static int linear_iterate_devices(struct dm_target *ti,
 	return fn(ti, lc->dev, lc->start, ti->len, data);
 }

+static long linear_direct_access(struct dm_target *ti, sector_t sector,
+				 void __pmem **kaddr, pfn_t *pfn, long size)
+{
+	struct linear_c *lc = ti->private;
+	struct block_device *bdev = lc->dev->bdev;
+	struct blk_dax_ctl dax = {
+		.sector = linear_map_sector(ti, sector),
+		.size = size,
+	};
+	long ret;
+
+	ret = bdev_direct_access(bdev, &dax);
+	*kaddr = dax.addr;
+	*pfn = dax.pfn;
+
+	return ret;
+}
+
 static struct target_type linear_target = {
 	.name   = "linear",
-	.version = {1, 2, 1},
+	.version = {1, 3, 0},
 	.module = THIS_MODULE,
 	.ctr    = linear_ctr,
 	.dtr    = linear_dtr,
@@ -151,6 +169,7 @@ static struct target_type linear_target = {
 	.status = linear_status,
 	.prepare_ioctl = linear_prepare_ioctl,
 	.iterate_devices = linear_iterate_devices,
+	.direct_access = linear_direct_access,
 };

 int __init dm_linear_init(void)

--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
+/*
+ * Internal header file for device mapper
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef DM_RQ_INTERNAL_H
+#define DM_RQ_INTERNAL_H
+
+#include <linux/bio.h>
+#include <linux/kthread.h>
+
+#include "dm-stats.h"
+
+struct mapped_device;
+
+/*
+ * One of these is allocated per request.
+ */
+struct dm_rq_target_io {
+	struct mapped_device *md;
+	struct dm_target *ti;
+	struct request *orig, *clone;
+	struct kthread_work work;
+	int error;
+	union map_info info;
+	struct dm_stats_aux stats_aux;
+	unsigned long duration_jiffies;
+	unsigned n_sectors;
+};
+
+/*
+ * For request-based dm - the bio clones we allocate are embedded in these
+ * structs.
+ *
+ * We allocate these with bio_alloc_bioset, using the front_pad parameter when
+ * the bioset is created - this means the bio has to come at the end of the
+ * struct.
+ */
+struct dm_rq_clone_bio_info {
+	struct bio *orig;
+	struct dm_rq_target_io *tio;
+	struct bio clone;
+};
+
+bool dm_use_blk_mq_default(void);
+bool dm_use_blk_mq(struct mapped_device *md);
+
+int dm_old_init_request_queue(struct mapped_device *md);
+int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t);
+void dm_mq_cleanup_mapped_device(struct mapped_device *md);
+
+void dm_start_queue(struct request_queue *q);
+void dm_stop_queue(struct request_queue *q);
+
+unsigned dm_get_reserved_rq_based_ios(void);
+
+ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf);
+ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
+						     const char *buf, size_t count);
+
+#endif
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2302,6 +2302,13 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
 	return do_origin(o->dev, bio);
 }

+static long origin_direct_access(struct dm_target *ti, sector_t sector,
+		void __pmem **kaddr, pfn_t *pfn, long size)
+{
+	DMWARN("device does not support dax.");
+	return -EIO;
+}
+
 /*
 * Set the target "max_io_len" field to the minimum of all the snapshots'
 * chunk sizes.
@@ -2361,6 +2368,7 @@ static struct target_type origin_target = {
 	.postsuspend = origin_postsuspend,
 	.status  = origin_status,
 	.iterate_devices = origin_iterate_devices,
+	.direct_access = origin_direct_access,
 };

 static struct target_type snapshot_target = {

--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/device-mapper.h>

-#include "dm.h"
+#include "dm-core.h"
 #include "dm-stats.h"

 #define DM_MSG_PREFIX "stats"

--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -308,6 +308,29 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }

+static long stripe_direct_access(struct dm_target *ti, sector_t sector,
+				 void __pmem **kaddr, pfn_t *pfn, long size)
+{
+	struct stripe_c *sc = ti->private;
+	uint32_t stripe;
+	struct block_device *bdev;
+	struct blk_dax_ctl dax = {
+		.size = size,
+	};
+	long ret;
+
+	stripe_map_sector(sc, sector, &stripe, &dax.sector);
+
+	dax.sector += sc->stripe[stripe].physical_start;
+	bdev = sc->stripe[stripe].dev->bdev;
+
+	ret = bdev_direct_access(bdev, &dax);
+	*kaddr = dax.addr;
+	*pfn = dax.pfn;
+
+	return ret;
+}
+
 /*
 * Stripe status:
 *
@@ -416,7 +439,7 @@ static void stripe_io_hints(struct dm_target *ti,

 static struct target_type stripe_target = {
 	.name   = "striped",
-	.version = {1, 5, 1},
+	.version = {1, 6, 0},
 	.module = THIS_MODULE,
 	.ctr    = stripe_ctr,
 	.dtr    = stripe_dtr,
@@ -425,6 +448,7 @@ static struct target_type stripe_target = {
 	.status = stripe_status,
 	.iterate_devices = stripe_iterate_devices,
 	.io_hints = stripe_io_hints,
+	.direct_access = stripe_direct_access,
 };

 int __init dm_stripe_init(void)

--- a/drivers/md/dm-sysfs.c
+++ b/drivers/md/dm-sysfs.c
@@ -6,7 +6,8 @@

 #include <linux/sysfs.h>
 #include <linux/dm-ioctl.h>
-#include "dm.h"
+#include "dm-core.h"
+#include "dm-rq.h"

 struct dm_sysfs_attr {
 	struct attribute attr;

--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -5,7 +5,7 @@
 * This file is released under the GPL.
 */

-#include "dm.h"
+#include "dm-core.h"

 #include <linux/module.h>
 #include <linux/vmalloc.h>
@@ -43,8 +43,10 @@ struct dm_table {
 	struct dm_target *targets;

 	struct target_type *immutable_target_type;
-	unsigned integrity_supported:1;
-	unsigned singleton:1;
+
+	bool integrity_supported:1;
+	bool singleton:1;
+	bool all_blk_mq:1;

 	/*
 	 * Indicates the rw permissions for the new logical
@@ -206,6 +208,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
 		return -ENOMEM;
 	}

+	t->type = DM_TYPE_NONE;
 	t->mode = mode;
 	t->md = md;
 	*result = t;
@@ -703,7 +706,7 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 			      dm_device_name(t->md), type);
 			return -EINVAL;
 		}
-		t->singleton = 1;
+		t->singleton = true;
 	}

 	if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) {
@@ -824,22 +827,70 @@ void dm_consume_args(struct dm_arg_set *as, unsigned num_args)
 }
 EXPORT_SYMBOL(dm_consume_args);

+static bool __table_type_bio_based(unsigned table_type)
+{
+	return (table_type == DM_TYPE_BIO_BASED ||
+		table_type == DM_TYPE_DAX_BIO_BASED);
+}
+
 static bool __table_type_request_based(unsigned table_type)
 {
 	return (table_type == DM_TYPE_REQUEST_BASED ||
 		table_type == DM_TYPE_MQ_REQUEST_BASED);
 }

-static int dm_table_set_type(struct dm_table *t)
+void dm_table_set_type(struct dm_table *t, unsigned type)
+{
+	t->type = type;
+}
+EXPORT_SYMBOL_GPL(dm_table_set_type);
+
+static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
+			       sector_t start, sector_t len, void *data)
+{
+	struct request_queue *q = bdev_get_queue(dev->bdev);
+
+	return q && blk_queue_dax(q);
+}
+
+static bool dm_table_supports_dax(struct dm_table *t)
+{
+	struct dm_target *ti;
+	unsigned i = 0;
+
+	/* Ensure that all targets support DAX. */
+	while (i < dm_table_get_num_targets(t)) {
+		ti = dm_table_get_target(t, i++);
+
+		if (!ti->type->direct_access)
+			return false;
+
+		if (!ti->type->iterate_devices ||
+		    !ti->type->iterate_devices(ti, device_supports_dax, NULL))
+			return false;
+	}
+
+	return true;
+}
+
+static int dm_table_determine_type(struct dm_table *t)
 {
 	unsigned i;
 	unsigned bio_based = 0, request_based = 0, hybrid = 0;
-	bool use_blk_mq = false;
+	bool verify_blk_mq = false;
 	struct dm_target *tgt;
 	struct dm_dev_internal *dd;
-	struct list_head *devices;
+	struct list_head *devices = dm_table_get_devices(t);
 	unsigned live_md_type = dm_get_md_type(t->md);

+	if (t->type != DM_TYPE_NONE) {
+		/* target already set the table's type */
+		if (t->type == DM_TYPE_BIO_BASED)
+			return 0;
+		BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
+		goto verify_rq_based;
+	}
+
 	for (i = 0; i < t->num_targets; i++) {
 		tgt = t->targets + i;
 		if (dm_target_hybrid(tgt))
@@ -871,11 +922,27 @@ static int dm_table_set_type(struct dm_table *t)
 	if (bio_based) {
 		/* We must use this table as bio-based */
 		t->type = DM_TYPE_BIO_BASED;
+		if (dm_table_supports_dax(t) ||
+		    (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED))
+			t->type = DM_TYPE_DAX_BIO_BASED;
 		return 0;
 	}

 	BUG_ON(!request_based); /* No targets in this table */

+	if (list_empty(devices) && __table_type_request_based(live_md_type)) {
+		/* inherit live MD type */
+		t->type = live_md_type;
+		return 0;
+	}
+
+	/*
+	 * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by
+	 * having a compatible target use dm_table_set_type.
+	 */
+	t->type = DM_TYPE_REQUEST_BASED;
+
+verify_rq_based:
 	/*
 	 * Request-based dm supports only tables that have a single target now.
 	 * To support multiple targets, request splitting support is needed,
@@ -888,7 +955,6 @@ static int dm_table_set_type(struct dm_table *t)
 	}

 	/* Non-request-stackable devices can't be used for request-based dm */
-	devices = dm_table_get_devices(t);
 	list_for_each_entry(dd, devices, list) {
 		struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);

@@ -899,10 +965,10 @@ static int dm_table_set_type(struct dm_table *t)
 		}

 		if (q->mq_ops)
-			use_blk_mq = true;
+			verify_blk_mq = true;
 	}

-	if (use_blk_mq) {
+	if (verify_blk_mq) {
 		/* verify _all_ devices in the table are blk-mq devices */
 		list_for_each_entry(dd, devices, list)
 			if (!bdev_get_queue(dd->dm_dev->bdev)->mq_ops) {
@@ -910,14 +976,9 @@ static int dm_table_set_type(struct dm_table *t)
 				      " are blk-mq request-stackable");
 				return -EINVAL;
 			}
-		t->type = DM_TYPE_MQ_REQUEST_BASED;
-
-	} else if (list_empty(devices) && __table_type_request_based(live_md_type)) {
-		/* inherit live MD type */
-		t->type = live_md_type;

-	} else
-		t->type = DM_TYPE_REQUEST_BASED;
+		t->all_blk_mq = true;
+	}

 	return 0;
 }
@@ -956,14 +1017,19 @@ struct dm_target *dm_table_get_wildcard_target(struct dm_table *t)
 	return NULL;
 }

+bool dm_table_bio_based(struct dm_table *t)
+{
+	return __table_type_bio_based(dm_table_get_type(t));
+}
+
 bool dm_table_request_based(struct dm_table *t)
 {
 	return __table_type_request_based(dm_table_get_type(t));
 }

-bool dm_table_mq_request_based(struct dm_table *t)
+bool dm_table_all_blk_mq_devices(struct dm_table *t)
 {
-	return dm_table_get_type(t) == DM_TYPE_MQ_REQUEST_BASED;
+	return t->all_blk_mq;
 }

 static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
@@ -978,7 +1044,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
 		return -EINVAL;
 	}

-	if (type == DM_TYPE_BIO_BASED)
+	if (__table_type_bio_based(type))
 		for (i = 0; i < t->num_targets; i++) {
 			tgt = t->targets + i;
 			per_io_data_size = max(per_io_data_size, tgt->per_io_data_size);
@@ -1106,7 +1172,7 @@ static int dm_table_register_integrity(struct dm_table *t)
 		return 0;

 	if (!integrity_profile_exists(dm_disk(md))) {
-		t->integrity_supported = 1;
+		t->integrity_supported = true;
 		/*
 		 * Register integrity profile during table load; we can do
 		 * this because the final profile must match during resume.
@@ -1129,7 +1195,7 @@ static int dm_table_register_integrity(struct dm_table *t)
 	}

 	/* Preserve existing integrity profile */
-	t->integrity_supported = 1;
+	t->integrity_supported = true;
 	return 0;
 }

@@ -1141,9 +1207,9 @@ int dm_table_complete(struct dm_table *t)
 {
 	int r;

-	r = dm_table_set_type(t);
+	r = dm_table_determine_type(t);
 	if (r) {
-		DMERR("unable to set table type");
+		DMERR("unable to determine table type");
 		return r;
 	}


--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -4,7 +4,7 @@
 * This file is released under the GPL.
 */

-#include "dm.h"
+#include "dm-core.h"

 #include <linux/module.h>
 #include <linux/init.h>
@@ -148,9 +148,15 @@ static void io_err_release_clone_rq(struct request *clone)
 {
 }

+static long io_err_direct_access(struct dm_target *ti, sector_t sector,
+				 void __pmem **kaddr, pfn_t *pfn, long size)
+{
+	return -EIO;
+}
+
 static struct target_type error_target = {
 	.name = "error",
-	.version = {1, 4, 0},
+	.version = {1, 5, 0},
 	.features = DM_TARGET_WILDCARD,
 	.ctr  = io_err_ctr,
 	.dtr  = io_err_dtr,
@@ -158,6 +164,7 @@ static struct target_type error_target = {
 	.map_rq = io_err_map_rq,
 	.clone_and_map_rq = io_err_clone_and_map_rq,
 	.release_clone_rq = io_err_release_clone_rq,
+	.direct_access = io_err_direct_access,
 };

 int __init dm_target_init(void)

--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1677,6 +1677,36 @@ int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *resu
 	return r;
 }

+int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
+{
+	int r = 0;
+
+	down_write(&pmd->root_lock);
+	for (; b != e; b++) {
+		r = dm_sm_inc_block(pmd->data_sm, b);
+		if (r)
+			break;
+	}
+	up_write(&pmd->root_lock);
+
+	return r;
+}
+
+int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
+{
+	int r = 0;
+
+	down_write(&pmd->root_lock);
+	for (; b != e; b++) {
+		r = dm_sm_dec_block(pmd->data_sm, b);
+		if (r)
+			break;
+	}
+	up_write(&pmd->root_lock);
+
+	return r;
+}
+
 bool dm_thin_changed_this_transaction(struct dm_thin_device *td)
 {
 	int r;

--- a/drivers/md/dm-thin-metadata.h
+++ b/drivers/md/dm-thin-metadata.h
@@ -197,6 +197,9 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result);

 int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result);

+int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e);
+int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e);
+
 /*
 * Returns -ENOSPC if the new size is too small and already allocated
 * blocks would be lost.

--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -253,6 +253,7 @@ struct pool {
 	struct bio_list deferred_flush_bios;
 	struct list_head prepared_mappings;
 	struct list_head prepared_discards;
+	struct list_head prepared_discards_pt2;
 	struct list_head active_thins;

 	struct dm_deferred_set *shared_read_ds;
@@ -269,6 +270,7 @@ struct pool {

 	process_mapping_fn process_prepared_mapping;
 	process_mapping_fn process_prepared_discard;
+	process_mapping_fn process_prepared_discard_pt2;

 	struct dm_bio_prison_cell **cell_sort_array;
 };
@@ -1001,7 +1003,8 @@ static void process_prepared_discard_no_passdown(struct dm_thin_new_mapping *m)

 /*----------------------------------------------------------------*/

-static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m)
+static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m,
+						   struct bio *discard_parent)
 {
 	/*
 	 * We've already unmapped this range of blocks, but before we
@@ -1014,7 +1017,7 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m
 	dm_block_t b = m->data_block, e, end = m->data_block + m->virt_end - m->virt_begin;
 	struct discard_op op;

-	begin_discard(&op, tc, m->bio);
+	begin_discard(&op, tc, discard_parent);
 	while (b != end) {
 		/* find start of unmapped run */
 		for (; b < end; b++) {
@@ -1049,27 +1052,100 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m
 	end_discard(&op, r);
 }

-static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m)
+static void queue_passdown_pt2(struct dm_thin_new_mapping *m)
+{
+	unsigned long flags;
+	struct pool *pool = m->tc->pool;
+
+	spin_lock_irqsave(&pool->lock, flags);
+	list_add_tail(&m->list, &pool->prepared_discards_pt2);
+	spin_unlock_irqrestore(&pool->lock, flags);
+	wake_worker(pool);
+}
+
+static void passdown_endio(struct bio *bio)
+{
+	/*
+	 * It doesn't matter if the passdown discard failed, we still want
+	 * to unmap (we ignore err).
+	 */
+	queue_passdown_pt2(bio->bi_private);
+}
+
+static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
 {
 	int r;
 	struct thin_c *tc = m->tc;
 	struct pool *pool = tc->pool;
+	struct bio *discard_parent;
+	dm_block_t data_end = m->data_block + (m->virt_end - m->virt_begin);

+	/*
+	 * Only this thread allocates blocks, so we can be sure that the
+	 * newly unmapped blocks will not be allocated before the end of
+	 * the function.
+	 */
 	r = dm_thin_remove_range(tc->td, m->virt_begin, m->virt_end);
 	if (r) {
 		metadata_operation_failed(pool, "dm_thin_remove_range", r);
 		bio_io_error(m->bio);
+		cell_defer_no_holder(tc, m->cell);
+		mempool_free(m, pool->mapping_pool);
+		return;
+	}

-	} else if (m->maybe_shared) {
-		passdown_double_checking_shared_status(m);
+	discard_parent = bio_alloc(GFP_NOIO, 1);
+	if (!discard_parent) {
+		DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
+		       dm_device_name(tc->pool->pool_md));
+		queue_passdown_pt2(m);

 	} else {
+		discard_parent->bi_end_io = passdown_endio;
+		discard_parent->bi_private = m;
+
+		if (m->maybe_shared)
+			passdown_double_checking_shared_status(m, discard_parent);
+		else {
 			struct discard_op op;
-		begin_discard(&op, tc, m->bio);
-		r = issue_discard(&op, m->data_block,
-				  m->data_block + (m->virt_end - m->virt_begin));
+
+			begin_discard(&op, tc, discard_parent);
+			r = issue_discard(&op, m->data_block, data_end);
 			end_discard(&op, r);
 		}
+	}
+
+	/*
+	 * Increment the unmapped blocks.  This prevents a race between the
+	 * passdown io and reallocation of freed blocks.
+	 */
+	r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
+	if (r) {
+		metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
+		bio_io_error(m->bio);
+		cell_defer_no_holder(tc, m->cell);
+		mempool_free(m, pool->mapping_pool);
+		return;
+	}
+}
+
+static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
+{
+	int r;
+	struct thin_c *tc = m->tc;
+	struct pool *pool = tc->pool;
+
+	/*
+	 * The passdown has completed, so now we can decrement all those
+	 * unmapped blocks.
+	 */
+	r = dm_pool_dec_data_range(pool->pmd, m->data_block,
+				   m->data_block + (m->virt_end - m->virt_begin));
+	if (r) {
+		metadata_operation_failed(pool, "dm_pool_dec_data_range", r);
+		bio_io_error(m->bio);
+	} else
+		bio_endio(m->bio);

 	cell_defer_no_holder(tc, m->cell);
 	mempool_free(m, pool->mapping_pool);
@@ -2215,6 +2291,8 @@ static void do_worker(struct work_struct *ws)
 	throttle_work_update(&pool->throttle);
 	process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard);
 	throttle_work_update(&pool->throttle);
+	process_prepared(pool, &pool->prepared_discards_pt2, &pool->process_prepared_discard_pt2);
+	throttle_work_update(&pool->throttle);
 	process_deferred_bios(pool);
 	throttle_work_complete(&pool->throttle);
 }
@@ -2343,7 +2421,8 @@ static void set_discard_callbacks(struct pool *pool)

 	if (passdown_enabled(pt)) {
 		pool->process_discard_cell = process_discard_cell_passdown;
-		pool->process_prepared_discard = process_prepared_discard_passdown;
+		pool->process_prepared_discard = process_prepared_discard_passdown_pt1;
+		pool->process_prepared_discard_pt2 = process_prepared_discard_passdown_pt2;
 	} else {
 		pool->process_discard_cell = process_discard_cell_no_passdown;
 		pool->process_prepared_discard = process_prepared_discard_no_passdown;
@@ -2830,6 +2909,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
 	bio_list_init(&pool->deferred_flush_bios);
 	INIT_LIST_HEAD(&pool->prepared_mappings);
 	INIT_LIST_HEAD(&pool->prepared_discards);
+	INIT_LIST_HEAD(&pool->prepared_discards_pt2);
 	INIT_LIST_HEAD(&pool->active_thins);
 	pool->low_water_triggered = false;
 	pool->suspended = true;

--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -453,9 +453,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
 	 */

 	offset = block << v->data_dev_block_bits;
-
-	res = offset;
-	div64_u64(res, v->fec->rounds << v->data_dev_block_bits);
+	res = div64_u64(offset, v->fec->rounds << v->data_dev_block_bits);

 	/*
 	 * The base RS block we can feed to the interleaver to find out all

--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -13,6 +13,7 @@
 #include <linux/fs.h>
 #include <linux/device-mapper.h>
 #include <linux/list.h>
+#include <linux/moduleparam.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/hdreg.h>
@@ -32,14 +33,6 @@
 */
 #define DM_STATUS_NOFLUSH_FLAG		(1 << 0)

-/*
- * Type of table and mapped_device's mempool
- */
-#define DM_TYPE_NONE			0
-#define DM_TYPE_BIO_BASED		1
-#define DM_TYPE_REQUEST_BASED		2
-#define DM_TYPE_MQ_REQUEST_BASED	3
-
 /*
 * List of devices that a metadevice uses and should open/close.
 */
@@ -75,8 +68,9 @@ unsigned dm_table_get_type(struct dm_table *t);
 struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
 struct dm_target *dm_table_get_immutable_target(struct dm_table *t);
 struct dm_target *dm_table_get_wildcard_target(struct dm_table *t);
+bool dm_table_bio_based(struct dm_table *t);
 bool dm_table_request_based(struct dm_table *t);
-bool dm_table_mq_request_based(struct dm_table *t);
+bool dm_table_all_blk_mq_devices(struct dm_table *t);
 void dm_table_free_md_mempools(struct dm_table *t);
 struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);

@@ -161,16 +155,6 @@ void dm_interface_exit(void);
 /*
 * sysfs interface
 */
-struct dm_kobject_holder {
-	struct kobject kobj;
-	struct completion completion;
-};
-
-static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
-{
-	return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
-}
-
 int dm_sysfs_init(struct mapped_device *md);
 void dm_sysfs_exit(struct mapped_device *md);
 struct kobject *dm_kobject(struct mapped_device *md);
@@ -212,8 +196,6 @@ int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
 void dm_internal_suspend(struct mapped_device *md);
 void dm_internal_resume(struct mapped_device *md);

-bool dm_use_blk_mq(struct mapped_device *md);
-
 int dm_io_init(void);
 void dm_io_exit(void);

@@ -228,18 +210,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
 void dm_free_md_mempools(struct dm_md_mempools *pools);

 /*
- * Helpers that are used by DM core
+ * Various helpers
 */
 unsigned dm_get_reserved_bio_based_ios(void);
-unsigned dm_get_reserved_rq_based_ios(void);
-
-static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
-{
-	return !maxlen || strlen(result) + 1 >= maxlen;
-}
-
-ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf);
-ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
-						     const char *buf, size_t count);

 #endif
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -429,7 +429,14 @@ static int dm_btree_lookup_next_single(struct dm_btree_info *info, dm_block_t ro

 	if (flags & INTERNAL_NODE) {
 		i = lower_bound(n, key);
-		if (i < 0 || i >= nr_entries) {
+		if (i < 0) {
+			/*
+			 * avoid early -ENODATA return when all entries are
+			 * higher than the search @key.
+			 */
+			i = 0;
+		}
+		if (i >= nr_entries) {
 			r = -ENODATA;
 			goto out;
 		}

--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1619,8 +1619,7 @@ static int sd_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
 		return -EOPNOTSUPP;
 	return sd_pr_command(bdev, (flags & PR_FL_IGNORE_KEY) ? 0x06 : 0x00,
 			old_key, new_key, 0,
-			(1 << 0) /* APTPL */ |
-			(1 << 2) /* ALL_TG_PT */);
+			(1 << 0) /* APTPL */);
 }

 static int sd_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,

--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -19,6 +19,15 @@ struct dm_table;
 struct mapped_device;
 struct bio_vec;

+/*
+ * Type of table, mapped_device's mempool and request_queue
+ */
+#define DM_TYPE_NONE			0
+#define DM_TYPE_BIO_BASED		1
+#define DM_TYPE_REQUEST_BASED		2
+#define DM_TYPE_MQ_REQUEST_BASED	3
+#define DM_TYPE_DAX_BIO_BASED		4
+
 typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;

 union map_info {
@@ -116,6 +125,14 @@ typedef void (*dm_io_hints_fn) (struct dm_target *ti,
 */
 typedef int (*dm_busy_fn) (struct dm_target *ti);

+/*
+ * Returns:
+ *  < 0 : error
+ * >= 0 : the number of bytes accessible at the address
+ */
+typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector,
+				     void __pmem **kaddr, pfn_t *pfn, long size);
+
 void dm_error(const char *message);

 struct dm_dev {
@@ -162,6 +179,7 @@ struct target_type {
 	dm_busy_fn busy;
 	dm_iterate_devices_fn iterate_devices;
 	dm_io_hints_fn io_hints;
+	dm_direct_access_fn direct_access;

 	/* For internal device-mapper use. */
 	struct list_head list;
@@ -443,6 +461,14 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 */
 void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb);

+/*
+ * Target can use this to set the table's type.
+ * Can only ever be called from a target's ctr.
+ * Useful for "hybrid" target (supports both bio-based
+ * and request-based).
+ */
+void dm_table_set_type(struct dm_table *t, unsigned type);
+
 /*
 * Finally call this to make the table ready for use.
 */

--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)

 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	34
+#define DM_VERSION_MINOR	35
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2015-10-28)"
+#define DM_VERSION_EXTRA	"-ioctl (2016-06-23)"

 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */