Commit bd5c4031 authored by Hannes Reinecke's avatar Hannes Reinecke Committed by Mike Snitzer

dm zoned: metadata version 2

Implement handling for metadata version 2. The new metadata adds a
label and UUID for the device mapper device, and additional UUID for
the underlying block devices.

It also allows for an additional regular drive to be used for
emulating random access zones. The emulated zones will be placed
logically in front of the zones from the zoned block device, causing
the superblocks and metadata to be stored on that device.

The first zone of the original zoned device will be used to hold
another, tertiary copy of the metadata; this copy carries a generation
number of 0 and is never updated; it's just used for identification.
Signed-off-by: default avatarHannes Reinecke <hare@suse.de>
Reviewed-by: default avatarBob Liu <bob.liu@oracle.com>
Reviewed-by: default avatarDamien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent dc076c83
...@@ -37,9 +37,13 @@ Algorithm ...@@ -37,9 +37,13 @@ Algorithm
dm-zoned implements an on-disk buffering scheme to handle non-sequential dm-zoned implements an on-disk buffering scheme to handle non-sequential
write accesses to the sequential zones of a zoned block device. write accesses to the sequential zones of a zoned block device.
Conventional zones are used for caching as well as for storing internal Conventional zones are used for caching as well as for storing internal
metadata. metadata. It can also use a regular block device together with the zoned
block device; in that case the regular block device will be split logically
in zones with the same size as the zoned block device. These zones will be
placed in front of the zones from the zoned block device and will be handled
just like conventional zones.
The zones of the device are separated into 2 types: The zones of the device(s) are separated into 2 types:
1) Metadata zones: these are conventional zones used to store metadata. 1) Metadata zones: these are conventional zones used to store metadata.
Metadata zones are not reported as useable capacity to the user. Metadata zones are not reported as useable capacity to the user.
...@@ -127,6 +131,13 @@ resumed. Flushing metadata thus only temporarily delays write and ...@@ -127,6 +131,13 @@ resumed. Flushing metadata thus only temporarily delays write and
discard requests. Read requests can be processed concurrently while discard requests. Read requests can be processed concurrently while
metadata flush is being executed. metadata flush is being executed.
If a regular device is used in conjunction with the zoned block device,
a third set of metadata (without the zone bitmaps) is written to the
start of the zoned block device. This metadata has a generation counter of
'0' and will never be updated during normal operation; it just serves for
identification purposes. The first and second copy of the metadata
are located at the start of the regular block device.
Usage Usage
===== =====
...@@ -138,12 +149,21 @@ Ex:: ...@@ -138,12 +149,21 @@ Ex::
dmzadm --format /dev/sdxx dmzadm --format /dev/sdxx
For a formatted device, the target can be created normally with the
dmsetup utility. The only parameter that dm-zoned requires is the
underlying zoned block device name. Ex::
echo "0 `blockdev --getsize ${dev}` zoned ${dev}" | \ If two drives are to be used, both devices must be specified, with the
dmsetup create dmz-`basename ${dev}` regular block device as the first device.
Ex::
dmzadm --format /dev/sdxx /dev/sdyy
Fomatted device(s) can be started with the dmzadm utility, too.:
Ex::
dmzadm --start /dev/sdxx /dev/sdyy
Information about the internal layout and current usage of the zones can Information about the internal layout and current usage of the zones can
be obtained with the 'status' callback from dmsetup: be obtained with the 'status' callback from dmsetup:
......
This diff is collapsed.
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
#define DMZ_MIN_BIOS 8192 #define DMZ_MIN_BIOS 8192
#define DMZ_MAX_DEVS 2
/* /*
* Zone BIO context. * Zone BIO context.
*/ */
...@@ -38,7 +40,7 @@ struct dm_chunk_work { ...@@ -38,7 +40,7 @@ struct dm_chunk_work {
* Target descriptor. * Target descriptor.
*/ */
struct dmz_target { struct dmz_target {
struct dm_dev *ddev; struct dm_dev *ddev[DMZ_MAX_DEVS];
unsigned long flags; unsigned long flags;
...@@ -81,7 +83,7 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status) ...@@ -81,7 +83,7 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK) if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
bio->bi_status = status; bio->bi_status = status;
if (bio->bi_status != BLK_STS_OK) if (bioctx->dev && bio->bi_status != BLK_STS_OK)
bioctx->dev->flags |= DMZ_CHECK_BDEV; bioctx->dev->flags |= DMZ_CHECK_BDEV;
if (refcount_dec_and_test(&bioctx->ref)) { if (refcount_dec_and_test(&bioctx->ref)) {
...@@ -690,60 +692,64 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) ...@@ -690,60 +692,64 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
/* /*
* Get zoned device information. * Get zoned device information.
*/ */
static int dmz_get_zoned_device(struct dm_target *ti, char *path) static int dmz_get_zoned_device(struct dm_target *ti, char *path,
int idx, int nr_devs)
{ {
struct dmz_target *dmz = ti->private; struct dmz_target *dmz = ti->private;
struct request_queue *q; struct dm_dev *ddev;
struct dmz_dev *dev; struct dmz_dev *dev;
sector_t aligned_capacity;
int ret; int ret;
struct block_device *bdev;
/* Get the target device */ /* Get the target device */
ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), &dmz->ddev); ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), &ddev);
if (ret) { if (ret) {
ti->error = "Get target device failed"; ti->error = "Get target device failed";
dmz->ddev = NULL;
return ret; return ret;
} }
dev = kzalloc(sizeof(struct dmz_dev), GFP_KERNEL); bdev = ddev->bdev;
if (!dev) { if (bdev_zoned_model(bdev) == BLK_ZONED_NONE) {
ret = -ENOMEM; if (nr_devs == 1) {
ti->error = "Invalid regular device";
goto err; goto err;
} }
if (idx != 0) {
dev->bdev = dmz->ddev->bdev; ti->error = "First device must be a regular device";
(void)bdevname(dev->bdev, dev->name);
if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) {
ti->error = "Not a zoned block device";
ret = -EINVAL;
goto err; goto err;
} }
if (dmz->ddev[0]) {
q = bdev_get_queue(dev->bdev); ti->error = "Too many regular devices";
dev->capacity = i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
aligned_capacity = dev->capacity &
~((sector_t)blk_queue_zone_sectors(q) - 1);
if (ti->begin ||
((ti->len != dev->capacity) && (ti->len != aligned_capacity))) {
ti->error = "Partial mapping not supported";
ret = -EINVAL;
goto err; goto err;
} }
dev = &dmz->dev[idx];
dev->flags = DMZ_BDEV_REGULAR;
} else {
if (dmz->ddev[idx]) {
ti->error = "Too many zoned devices";
goto err;
}
if (nr_devs > 1 && idx == 0) {
ti->error = "First device must be a regular device";
goto err;
}
dev = &dmz->dev[idx];
}
dev->bdev = bdev;
(void)bdevname(dev->bdev, dev->name);
dev->zone_nr_sectors = blk_queue_zone_sectors(q); dev->capacity = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
if (ti->begin) {
dev->nr_zones = blkdev_nr_zones(dev->bdev->bd_disk); ti->error = "Partial mapping is not supported";
goto err;
}
dmz->dev = dev; dmz->ddev[idx] = ddev;
return 0; return 0;
err: err:
dm_put_device(ti, dmz->ddev); dm_put_device(ti, ddev);
kfree(dev); return -EINVAL;
return ret;
} }
/* /*
...@@ -752,10 +758,56 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path) ...@@ -752,10 +758,56 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path)
static void dmz_put_zoned_device(struct dm_target *ti) static void dmz_put_zoned_device(struct dm_target *ti)
{ {
struct dmz_target *dmz = ti->private; struct dmz_target *dmz = ti->private;
int i;
dm_put_device(ti, dmz->ddev); for (i = 0; i < DMZ_MAX_DEVS; i++) {
kfree(dmz->dev); if (dmz->ddev[i]) {
dmz->dev = NULL; dm_put_device(ti, dmz->ddev[i]);
dmz->ddev[i] = NULL;
}
}
}
static int dmz_fixup_devices(struct dm_target *ti)
{
struct dmz_target *dmz = ti->private;
struct dmz_dev *reg_dev, *zoned_dev;
struct request_queue *q;
/*
* When we have two devices, the first one must be a regular block
* device and the second a zoned block device.
*/
if (dmz->ddev[0] && dmz->ddev[1]) {
reg_dev = &dmz->dev[0];
if (!(reg_dev->flags & DMZ_BDEV_REGULAR)) {
ti->error = "Primary disk is not a regular device";
return -EINVAL;
}
zoned_dev = &dmz->dev[1];
if (zoned_dev->flags & DMZ_BDEV_REGULAR) {
ti->error = "Secondary disk is not a zoned device";
return -EINVAL;
}
} else {
reg_dev = NULL;
zoned_dev = &dmz->dev[0];
if (zoned_dev->flags & DMZ_BDEV_REGULAR) {
ti->error = "Disk is not a zoned device";
return -EINVAL;
}
}
q = bdev_get_queue(zoned_dev->bdev);
zoned_dev->zone_nr_sectors = blk_queue_zone_sectors(q);
zoned_dev->nr_zones = blkdev_nr_zones(zoned_dev->bdev->bd_disk);
if (reg_dev) {
reg_dev->zone_nr_sectors = zoned_dev->zone_nr_sectors;
reg_dev->nr_zones = DIV_ROUND_UP(reg_dev->capacity,
reg_dev->zone_nr_sectors);
zoned_dev->zone_offset = reg_dev->nr_zones;
}
return 0;
} }
/* /*
...@@ -764,11 +816,10 @@ static void dmz_put_zoned_device(struct dm_target *ti) ...@@ -764,11 +816,10 @@ static void dmz_put_zoned_device(struct dm_target *ti)
static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{ {
struct dmz_target *dmz; struct dmz_target *dmz;
struct dmz_dev *dev;
int ret; int ret;
/* Check arguments */ /* Check arguments */
if (argc != 1) { if (argc < 1 || argc > 2) {
ti->error = "Invalid argument count"; ti->error = "Invalid argument count";
return -EINVAL; return -EINVAL;
} }
...@@ -779,18 +830,34 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -779,18 +830,34 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->error = "Unable to allocate the zoned target descriptor"; ti->error = "Unable to allocate the zoned target descriptor";
return -ENOMEM; return -ENOMEM;
} }
dmz->dev = kcalloc(2, sizeof(struct dmz_dev), GFP_KERNEL);
if (!dmz->dev) {
ti->error = "Unable to allocate the zoned device descriptors";
kfree(dmz);
return -ENOMEM;
}
ti->private = dmz; ti->private = dmz;
/* Get the target zoned block device */ /* Get the target zoned block device */
ret = dmz_get_zoned_device(ti, argv[0]); ret = dmz_get_zoned_device(ti, argv[0], 0, argc);
if (ret)
goto err;
if (argc == 2) {
ret = dmz_get_zoned_device(ti, argv[1], 1, argc);
if (ret) {
dmz_put_zoned_device(ti);
goto err;
}
}
ret = dmz_fixup_devices(ti);
if (ret) { if (ret) {
dmz->ddev = NULL; dmz_put_zoned_device(ti);
goto err; goto err;
} }
/* Initialize metadata */ /* Initialize metadata */
dev = dmz->dev; ret = dmz_ctr_metadata(dmz->dev, argc, &dmz->metadata,
ret = dmz_ctr_metadata(dev, &dmz->metadata,
dm_table_device_name(ti->table)); dm_table_device_name(ti->table));
if (ret) { if (ret) {
ti->error = "Metadata initialization failed"; ti->error = "Metadata initialization failed";
...@@ -867,6 +934,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -867,6 +934,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
err_dev: err_dev:
dmz_put_zoned_device(ti); dmz_put_zoned_device(ti);
err: err:
kfree(dmz->dev);
kfree(dmz); kfree(dmz);
return ret; return ret;
...@@ -897,6 +965,7 @@ static void dmz_dtr(struct dm_target *ti) ...@@ -897,6 +965,7 @@ static void dmz_dtr(struct dm_target *ti)
mutex_destroy(&dmz->chunk_lock); mutex_destroy(&dmz->chunk_lock);
kfree(dmz->dev);
kfree(dmz); kfree(dmz);
} }
...@@ -971,10 +1040,17 @@ static int dmz_iterate_devices(struct dm_target *ti, ...@@ -971,10 +1040,17 @@ static int dmz_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data) iterate_devices_callout_fn fn, void *data)
{ {
struct dmz_target *dmz = ti->private; struct dmz_target *dmz = ti->private;
struct dmz_dev *dev = dmz->dev; unsigned int zone_nr_sectors = dmz_zone_nr_sectors(dmz->metadata);
sector_t capacity = dev->capacity & ~(dmz_zone_nr_sectors(dmz->metadata) - 1); sector_t capacity;
int r;
return fn(ti, dmz->ddev, 0, capacity, data);
capacity = dmz->dev[0].capacity & ~(zone_nr_sectors - 1);
r = fn(ti, dmz->ddev[0], 0, capacity, data);
if (!r && dmz->ddev[1]) {
capacity = dmz->dev[1].capacity & ~(zone_nr_sectors - 1);
r = fn(ti, dmz->ddev[1], 0, capacity, data);
}
return r;
} }
static void dmz_status(struct dm_target *ti, status_type_t type, static void dmz_status(struct dm_target *ti, status_type_t type,
...@@ -984,6 +1060,7 @@ static void dmz_status(struct dm_target *ti, status_type_t type, ...@@ -984,6 +1060,7 @@ static void dmz_status(struct dm_target *ti, status_type_t type,
struct dmz_target *dmz = ti->private; struct dmz_target *dmz = ti->private;
ssize_t sz = 0; ssize_t sz = 0;
char buf[BDEVNAME_SIZE]; char buf[BDEVNAME_SIZE];
struct dmz_dev *dev;
switch (type) { switch (type) {
case STATUSTYPE_INFO: case STATUSTYPE_INFO:
...@@ -995,8 +1072,14 @@ static void dmz_status(struct dm_target *ti, status_type_t type, ...@@ -995,8 +1072,14 @@ static void dmz_status(struct dm_target *ti, status_type_t type,
dmz_nr_seq_zones(dmz->metadata)); dmz_nr_seq_zones(dmz->metadata));
break; break;
case STATUSTYPE_TABLE: case STATUSTYPE_TABLE:
format_dev_t(buf, dmz->dev->bdev->bd_dev); dev = &dmz->dev[0];
format_dev_t(buf, dev->bdev->bd_dev);
DMEMIT("%s", buf); DMEMIT("%s", buf);
if (dmz->dev[1].bdev) {
dev = &dmz->dev[1];
format_dev_t(buf, dev->bdev->bd_dev);
DMEMIT(" %s", buf);
}
break; break;
} }
return; return;
...@@ -1018,7 +1101,7 @@ static int dmz_message(struct dm_target *ti, unsigned int argc, char **argv, ...@@ -1018,7 +1101,7 @@ static int dmz_message(struct dm_target *ti, unsigned int argc, char **argv,
static struct target_type dmz_type = { static struct target_type dmz_type = {
.name = "zoned", .name = "zoned",
.version = {1, 1, 0}, .version = {2, 0, 0},
.features = DM_TARGET_SINGLETON | DM_TARGET_ZONED_HM, .features = DM_TARGET_SINGLETON | DM_TARGET_ZONED_HM,
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = dmz_ctr, .ctr = dmz_ctr,
......
...@@ -52,10 +52,12 @@ struct dmz_dev { ...@@ -52,10 +52,12 @@ struct dmz_dev {
struct block_device *bdev; struct block_device *bdev;
char name[BDEVNAME_SIZE]; char name[BDEVNAME_SIZE];
uuid_t uuid;
sector_t capacity; sector_t capacity;
unsigned int nr_zones; unsigned int nr_zones;
unsigned int zone_offset;
unsigned int flags; unsigned int flags;
...@@ -69,6 +71,7 @@ struct dmz_dev { ...@@ -69,6 +71,7 @@ struct dmz_dev {
/* Device flags. */ /* Device flags. */
#define DMZ_BDEV_DYING (1 << 0) #define DMZ_BDEV_DYING (1 << 0)
#define DMZ_CHECK_BDEV (2 << 0) #define DMZ_CHECK_BDEV (2 << 0)
#define DMZ_BDEV_REGULAR (4 << 0)
/* /*
* Zone descriptor. * Zone descriptor.
...@@ -163,8 +166,8 @@ struct dmz_reclaim; ...@@ -163,8 +166,8 @@ struct dmz_reclaim;
/* /*
* Functions defined in dm-zoned-metadata.c * Functions defined in dm-zoned-metadata.c
*/ */
int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **zmd, int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev,
const char *devname); struct dmz_metadata **zmd, const char *devname);
void dmz_dtr_metadata(struct dmz_metadata *zmd); void dmz_dtr_metadata(struct dmz_metadata *zmd);
int dmz_resume_metadata(struct dmz_metadata *zmd); int dmz_resume_metadata(struct dmz_metadata *zmd);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment