Commit 912afc36 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'dm-3.5-changes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm

Pull device-mapper updates from Alasdair G Kergon:
 "Improve multipath's retrying mechanism in some defined circumstances
  and provide a simple reserve/release mechanism for userspace tools to
  access thin provisioning metadata while the pool is in use."

* tag 'dm-3.5-changes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm:
  dm thin: provide userspace access to pool metadata
  dm thin: use slab mempools
  dm mpath: allow ioctls to trigger pg init
  dm mpath: delay retry of bypassed pg
  dm mpath: reduce size of struct multipath
parents 4fc3acf2 cc8394d8
......@@ -287,6 +287,17 @@ iii) Messages
the current transaction id is when you change it with this
compare-and-swap message.
reserve_metadata_snap
Reserve a copy of the data mapping btree for use by userland.
This allows userland to inspect the mappings as they were when
this message was executed. Use the pool's status command to
get the root block associated with the metadata snapshot.
release_metadata_snap
Release a previously reserved copy of the data mapping btree.
'thin' target
-------------
......
......@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/workqueue.h>
#include <linux/delay.h>
#include <scsi/scsi_dh.h>
#include <linux/atomic.h>
......@@ -61,11 +62,11 @@ struct multipath {
struct list_head list;
struct dm_target *ti;
spinlock_t lock;
const char *hw_handler_name;
char *hw_handler_params;
spinlock_t lock;
unsigned nr_priority_groups;
struct list_head priority_groups;
......@@ -81,16 +82,17 @@ struct multipath {
struct priority_group *next_pg; /* Switch to this PG if set */
unsigned repeat_count; /* I/Os left before calling PS again */
unsigned queue_io; /* Must we queue all I/O? */
unsigned queue_if_no_path; /* Queue I/O if last path fails? */
unsigned saved_queue_if_no_path;/* Saved state during suspension */
unsigned queue_io:1; /* Must we queue all I/O? */
unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */
unsigned saved_queue_if_no_path:1; /* Saved state during suspension */
unsigned pg_init_retries; /* Number of times to retry pg_init */
unsigned pg_init_count; /* Number of times pg_init called */
unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */
unsigned queue_size;
struct work_struct process_queued_ios;
struct list_head queued_ios;
unsigned queue_size;
struct work_struct trigger_event;
......@@ -328,14 +330,18 @@ static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
/*
* Loop through priority groups until we find a valid path.
* First time we skip PGs marked 'bypassed'.
* Second time we only try the ones we skipped.
* Second time we only try the ones we skipped, but set
* pg_init_delay_retry so we do not hammer controllers.
*/
do {
list_for_each_entry(pg, &m->priority_groups, list) {
if (pg->bypassed == bypassed)
continue;
if (!__choose_path_in_pg(m, pg, nr_bytes))
if (!__choose_path_in_pg(m, pg, nr_bytes)) {
if (!bypassed)
m->pg_init_delay_retry = 1;
return;
}
}
} while (bypassed--);
......@@ -481,9 +487,6 @@ static void process_queued_ios(struct work_struct *work)
spin_lock_irqsave(&m->lock, flags);
if (!m->queue_size)
goto out;
if (!m->current_pgpath)
__choose_pgpath(m, 0);
......@@ -496,7 +499,6 @@ static void process_queued_ios(struct work_struct *work)
if (m->pg_init_required && !m->pg_init_in_progress && pgpath)
__pg_init_all_paths(m);
out:
spin_unlock_irqrestore(&m->lock, flags);
if (!must_queue)
dispatch_queued_ios(m);
......@@ -1517,11 +1519,16 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
unsigned long arg)
{
struct multipath *m = (struct multipath *) ti->private;
struct block_device *bdev = NULL;
fmode_t mode = 0;
struct multipath *m = ti->private;
struct block_device *bdev;
fmode_t mode;
unsigned long flags;
int r = 0;
int r;
again:
bdev = NULL;
mode = 0;
r = 0;
spin_lock_irqsave(&m->lock, flags);
......@@ -1546,6 +1553,12 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
if (!r && ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT)
r = scsi_verify_blk_ioctl(NULL, cmd);
if (r == -EAGAIN && !fatal_signal_pending(current)) {
queue_work(kmultipathd, &m->process_queued_ios);
msleep(10);
goto again;
}
return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
}
......@@ -1643,7 +1656,7 @@ static int multipath_busy(struct dm_target *ti)
*---------------------------------------------------------------*/
static struct target_type multipath_target = {
.name = "multipath",
.version = {1, 3, 0},
.version = {1, 4, 0},
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
......
......@@ -1082,31 +1082,155 @@ int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
return 0;
}
static int __get_held_metadata_root(struct dm_pool_metadata *pmd,
dm_block_t *result)
static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
{
int r, inc;
struct thin_disk_superblock *disk_super;
struct dm_block *copy, *sblock;
dm_block_t held_root;
/*
* Copy the superblock.
*/
dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION);
r = dm_tm_shadow_block(pmd->tm, THIN_SUPERBLOCK_LOCATION,
&sb_validator, &copy, &inc);
if (r)
return r;
BUG_ON(!inc);
held_root = dm_block_location(copy);
disk_super = dm_block_data(copy);
if (le64_to_cpu(disk_super->held_root)) {
DMWARN("Pool metadata snapshot already exists: release this before taking another.");
dm_tm_dec(pmd->tm, held_root);
dm_tm_unlock(pmd->tm, copy);
pmd->need_commit = 1;
return -EBUSY;
}
/*
* Wipe the spacemap since we're not publishing this.
*/
memset(&disk_super->data_space_map_root, 0,
sizeof(disk_super->data_space_map_root));
memset(&disk_super->metadata_space_map_root, 0,
sizeof(disk_super->metadata_space_map_root));
/*
* Increment the data structures that need to be preserved.
*/
dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->data_mapping_root));
dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->device_details_root));
dm_tm_unlock(pmd->tm, copy);
/*
* Write the held root into the superblock.
*/
r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
&sb_validator, &sblock);
if (r) {
dm_tm_dec(pmd->tm, held_root);
pmd->need_commit = 1;
return r;
}
disk_super = dm_block_data(sblock);
disk_super->held_root = cpu_to_le64(held_root);
dm_bm_unlock(sblock);
pmd->need_commit = 1;
return 0;
}
int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd)
{
int r;
down_write(&pmd->root_lock);
r = __reserve_metadata_snap(pmd);
up_write(&pmd->root_lock);
return r;
}
static int __release_metadata_snap(struct dm_pool_metadata *pmd)
{
int r;
struct thin_disk_superblock *disk_super;
struct dm_block *sblock;
struct dm_block *sblock, *copy;
dm_block_t held_root;
r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
&sb_validator, &sblock);
if (r)
return r;
disk_super = dm_block_data(sblock);
held_root = le64_to_cpu(disk_super->held_root);
disk_super->held_root = cpu_to_le64(0);
pmd->need_commit = 1;
dm_bm_unlock(sblock);
if (!held_root) {
DMWARN("No pool metadata snapshot found: nothing to release.");
return -EINVAL;
}
r = dm_tm_read_lock(pmd->tm, held_root, &sb_validator, &copy);
if (r)
return r;
disk_super = dm_block_data(copy);
dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root));
dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root));
dm_sm_dec_block(pmd->metadata_sm, held_root);
return dm_tm_unlock(pmd->tm, copy);
}
int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd)
{
int r;
down_write(&pmd->root_lock);
r = __release_metadata_snap(pmd);
up_write(&pmd->root_lock);
return r;
}
static int __get_metadata_snap(struct dm_pool_metadata *pmd,
dm_block_t *result)
{
int r;
struct thin_disk_superblock *disk_super;
struct dm_block *sblock;
r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
&sb_validator, &sblock);
if (r)
return r;
disk_super = dm_block_data(sblock);
*result = le64_to_cpu(disk_super->held_root);
return dm_bm_unlock(sblock);
}
int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
dm_block_t *result)
int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
dm_block_t *result)
{
int r;
down_read(&pmd->root_lock);
r = __get_held_metadata_root(pmd, result);
r = __get_metadata_snap(pmd, result);
up_read(&pmd->root_lock);
return r;
......
......@@ -90,11 +90,18 @@ int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
/*
* Hold/get root for userspace transaction.
*
* The metadata snapshot is a copy of the current superblock (minus the
* space maps). Userland can access the data structures for READ
* operations only. A small performance hit is incurred by providing this
* copy of the metadata to userland due to extra copy-on-write operations
* on the metadata nodes. Release this as soon as you finish with it.
*/
int dm_pool_hold_metadata_root(struct dm_pool_metadata *pmd);
int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd);
int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd);
int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
dm_block_t *result);
int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
dm_block_t *result);
/*
* Actions on a single virtual device.
......
This diff is collapsed.
......@@ -249,6 +249,7 @@ int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
return r;
}
EXPORT_SYMBOL_GPL(dm_tm_shadow_block);
int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
struct dm_block_validator *v,
......@@ -259,6 +260,7 @@ int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
return dm_bm_read_lock(tm->bm, b, v, blk);
}
EXPORT_SYMBOL_GPL(dm_tm_read_lock);
int dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment