Commit b49249d1 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'dm-3.8-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm

Pull dm update from Alasdair G Kergon:
 "Miscellaneous device-mapper fixes, cleanups and performance
  improvements.

  Of particular note:
   - Disable broken WRITE SAME support in all targets except linear and
     striped.  Use it when kcopyd is zeroing blocks.
   - Remove several mempools from targets by moving the data into the
     bio's new front_pad area(which dm calls 'per_bio_data').
   - Fix a race in thin provisioning if discards are misused.
   - Prevent userspace from interfering with the ioctl parameters and
     use kmalloc for the data buffer if it's small instead of vmalloc.
   - Throttle some annoying error messages when I/O fails."

* tag 'dm-3.8-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm: (36 commits)
  dm stripe: add WRITE SAME support
  dm: remove map_info
  dm snapshot: do not use map_context
  dm thin: dont use map_context
  dm raid1: dont use map_context
  dm flakey: dont use map_context
  dm raid1: rename read_record to bio_record
  dm: move target request nr to dm_target_io
  dm snapshot: use per_bio_data
  dm verity: use per_bio_data
  dm raid1: use per_bio_data
  dm: introduce per_bio_data
  dm kcopyd: add WRITE SAME support to dm_kcopyd_zero
  dm linear: add WRITE SAME support
  dm: add WRITE SAME support
  dm: prepare to support WRITE SAME
  dm ioctl: use kmalloc if possible
  dm ioctl: remove PF_MEMALLOC
  dm persistent data: improve improve space map block alloc failure message
  dm thin: use DMERR_LIMIT for errors
  ...
parents 10532b56 45e621d4
......@@ -207,31 +207,6 @@ void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios)
}
EXPORT_SYMBOL_GPL(dm_cell_release);
/*
* There are a couple of places where we put a bio into a cell briefly
* before taking it out again. In these situations we know that no other
* bio may be in the cell. This function releases the cell, and also does
* a sanity check.
*/
static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio)
{
BUG_ON(cell->holder != bio);
BUG_ON(!bio_list_empty(&cell->bios));
__cell_release(cell, NULL);
}
void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio)
{
unsigned long flags;
struct dm_bio_prison *prison = cell->prison;
spin_lock_irqsave(&prison->lock, flags);
__cell_release_singleton(cell, bio);
spin_unlock_irqrestore(&prison->lock, flags);
}
EXPORT_SYMBOL_GPL(dm_cell_release_singleton);
/*
* Sometimes we don't want the holder, just the additional bios.
*/
......
......@@ -44,7 +44,6 @@ int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key,
struct bio *inmate, struct dm_bio_prison_cell **ref);
void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios);
void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio); // FIXME: bio arg not needed
void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates);
void dm_cell_error(struct dm_bio_prison_cell *cell);
......
......@@ -1689,8 +1689,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
return ret;
}
static int crypt_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int crypt_map(struct dm_target *ti, struct bio *bio)
{
struct dm_crypt_io *io;
struct crypt_config *cc = ti->private;
......@@ -1846,7 +1845,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
static struct target_type crypt_target = {
.name = "crypt",
.version = {1, 11, 0},
.version = {1, 12, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
......
......@@ -274,8 +274,7 @@ static void delay_resume(struct dm_target *ti)
atomic_set(&dc->may_delay, 1);
}
static int delay_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int delay_map(struct dm_target *ti, struct bio *bio)
{
struct delay_c *dc = ti->private;
......@@ -338,7 +337,7 @@ static int delay_iterate_devices(struct dm_target *ti,
static struct target_type delay_target = {
.name = "delay",
.version = {1, 1, 0},
.version = {1, 2, 0},
.module = THIS_MODULE,
.ctr = delay_ctr,
.dtr = delay_dtr,
......
......@@ -39,6 +39,10 @@ enum feature_flag_bits {
DROP_WRITES
};
struct per_bio_data {
bool bio_submitted;
};
static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
struct dm_target *ti)
{
......@@ -214,6 +218,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_flush_requests = 1;
ti->num_discard_requests = 1;
ti->per_bio_data_size = sizeof(struct per_bio_data);
ti->private = fc;
return 0;
......@@ -265,11 +270,12 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
}
}
static int flakey_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int flakey_map(struct dm_target *ti, struct bio *bio)
{
struct flakey_c *fc = ti->private;
unsigned elapsed;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
pb->bio_submitted = false;
/* Are we alive ? */
elapsed = (jiffies - fc->start_time) / HZ;
......@@ -277,7 +283,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio,
/*
* Flag this bio as submitted while down.
*/
map_context->ll = 1;
pb->bio_submitted = true;
/*
* Map reads as normal.
......@@ -314,17 +320,16 @@ static int flakey_map(struct dm_target *ti, struct bio *bio,
return DM_MAPIO_REMAPPED;
}
static int flakey_end_io(struct dm_target *ti, struct bio *bio,
int error, union map_info *map_context)
static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
{
struct flakey_c *fc = ti->private;
unsigned bio_submitted_while_down = map_context->ll;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
/*
* Corrupt successful READs while in down state.
* If flags were specified, only corrupt those that match.
*/
if (fc->corrupt_bio_byte && !error && bio_submitted_while_down &&
if (fc->corrupt_bio_byte && !error && pb->bio_submitted &&
(bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) &&
all_corrupt_bio_flags_match(bio, fc))
corrupt_bio_data(bio, fc);
......@@ -406,7 +411,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_
static struct target_type flakey_target = {
.name = "flakey",
.version = {1, 2, 0},
.version = {1, 3, 0},
.module = THIS_MODULE,
.ctr = flakey_ctr,
.dtr = flakey_dtr,
......
......@@ -287,7 +287,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
unsigned num_bvecs;
sector_t remaining = where->count;
struct request_queue *q = bdev_get_queue(where->bdev);
sector_t discard_sectors;
unsigned short logical_block_size = queue_logical_block_size(q);
sector_t num_sectors;
/*
* where->count may be zero if rw holds a flush and we need to
......@@ -297,7 +298,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
/*
* Allocate a suitably sized-bio.
*/
if (rw & REQ_DISCARD)
if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME))
num_bvecs = 1;
else
num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev),
......@@ -310,9 +311,21 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
store_io_and_region_in_bio(bio, io, region);
if (rw & REQ_DISCARD) {
discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
bio->bi_size = discard_sectors << SECTOR_SHIFT;
remaining -= discard_sectors;
num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
bio->bi_size = num_sectors << SECTOR_SHIFT;
remaining -= num_sectors;
} else if (rw & REQ_WRITE_SAME) {
/*
* WRITE SAME only uses a single page.
*/
dp->get_page(dp, &page, &len, &offset);
bio_add_page(bio, page, logical_block_size, offset);
num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining);
bio->bi_size = num_sectors << SECTOR_SHIFT;
offset = 0;
remaining -= num_sectors;
dp->next_page(dp);
} else while (remaining) {
/*
* Try and add as many pages as possible.
......
......@@ -1543,7 +1543,21 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user)
return r;
}
static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
#define DM_PARAMS_VMALLOC 0x0001 /* Params alloced with vmalloc not kmalloc */
#define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */
static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags)
{
if (param_flags & DM_WIPE_BUFFER)
memset(param, 0, param_size);
if (param_flags & DM_PARAMS_VMALLOC)
vfree(param);
else
kfree(param);
}
static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags)
{
struct dm_ioctl tmp, *dmi;
int secure_data;
......@@ -1556,7 +1570,21 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
secure_data = tmp.flags & DM_SECURE_DATA_FLAG;
dmi = vmalloc(tmp.data_size);
*param_flags = secure_data ? DM_WIPE_BUFFER : 0;
/*
* Try to avoid low memory issues when a device is suspended.
* Use kmalloc() rather than vmalloc() when we can.
*/
dmi = NULL;
if (tmp.data_size <= KMALLOC_MAX_SIZE)
dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
if (!dmi) {
dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL);
*param_flags |= DM_PARAMS_VMALLOC;
}
if (!dmi) {
if (secure_data && clear_user(user, tmp.data_size))
return -EFAULT;
......@@ -1566,6 +1594,14 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
if (copy_from_user(dmi, user, tmp.data_size))
goto bad;
/*
* Abort if something changed the ioctl data while it was being copied.
*/
if (dmi->data_size != tmp.data_size) {
DMERR("rejecting ioctl: data size modified while processing parameters");
goto bad;
}
/* Wipe the user buffer so we do not return it to userspace */
if (secure_data && clear_user(user, tmp.data_size))
goto bad;
......@@ -1574,9 +1610,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
return 0;
bad:
if (secure_data)
memset(dmi, 0, tmp.data_size);
vfree(dmi);
free_params(dmi, tmp.data_size, *param_flags);
return -EFAULT;
}
......@@ -1613,7 +1648,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param)
static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
{
int r = 0;
int wipe_buffer;
int param_flags;
unsigned int cmd;
struct dm_ioctl *uninitialized_var(param);
ioctl_fn fn = NULL;
......@@ -1648,25 +1683,15 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
return -ENOTTY;
}
/*
* Trying to avoid low memory issues when a device is
* suspended.
*/
current->flags |= PF_MEMALLOC;
/*
* Copy the parameters into kernel space.
*/
r = copy_params(user, &param);
current->flags &= ~PF_MEMALLOC;
r = copy_params(user, &param, &param_flags);
if (r)
return r;
input_param_size = param->data_size;
wipe_buffer = param->flags & DM_SECURE_DATA_FLAG;
r = validate_params(cmd, param);
if (r)
goto out;
......@@ -1681,10 +1706,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
r = -EFAULT;
out:
if (wipe_buffer)
memset(param, 0, input_param_size);
vfree(param);
free_params(param, input_param_size, param_flags);
return r;
}
......
......@@ -349,7 +349,7 @@ static void complete_io(unsigned long error, void *context)
struct dm_kcopyd_client *kc = job->kc;
if (error) {
if (job->rw == WRITE)
if (job->rw & WRITE)
job->write_err |= error;
else
job->read_err = 1;
......@@ -361,7 +361,7 @@ static void complete_io(unsigned long error, void *context)
}
}
if (job->rw == WRITE)
if (job->rw & WRITE)
push(&kc->complete_jobs, job);
else {
......@@ -432,7 +432,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
if (r < 0) {
/* error this rogue job */
if (job->rw == WRITE)
if (job->rw & WRITE)
job->write_err = (unsigned long) -1L;
else
job->read_err = 1;
......@@ -585,6 +585,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
{
struct kcopyd_job *job;
int i;
/*
* Allocate an array of jobs consisting of one master job
......@@ -611,7 +612,16 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
memset(&job->source, 0, sizeof job->source);
job->source.count = job->dests[0].count;
job->pages = &zero_page_list;
/*
* Use WRITE SAME to optimize zeroing if all dests support it.
*/
job->rw = WRITE | REQ_WRITE_SAME;
for (i = 0; i < job->num_dests; i++)
if (!bdev_write_same(job->dests[i].bdev)) {
job->rw = WRITE;
break;
}
}
job->fn = fn;
......
......@@ -55,6 +55,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_flush_requests = 1;
ti->num_discard_requests = 1;
ti->num_write_same_requests = 1;
ti->private = lc;
return 0;
......@@ -87,8 +88,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
bio->bi_sector = linear_map_sector(ti, bio->bi_sector);
}
static int linear_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int linear_map(struct dm_target *ti, struct bio *bio)
{
linear_map_bio(ti, bio);
......@@ -155,7 +155,7 @@ static int linear_iterate_devices(struct dm_target *ti,
static struct target_type linear_target = {
.name = "linear",
.version = {1, 1, 0},
.version = {1, 2, 0},
.module = THIS_MODULE,
.ctr = linear_ctr,
.dtr = linear_dtr,
......
......@@ -295,9 +295,11 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
* Choose a reasonable default. All figures in sectors.
*/
if (min_region_size > (1 << 13)) {
/* If not a power of 2, make it the next power of 2 */
if (min_region_size & (min_region_size - 1))
region_size = 1 << fls(region_size);
DMINFO("Choosing default region size of %lu sectors",
region_size);
region_size = min_region_size;
} else {
DMINFO("Choosing default region size of 4MiB");
region_size = 1 << 13; /* sectors */
......@@ -1216,7 +1218,7 @@ static void raid_dtr(struct dm_target *ti)
context_free(rs);
}
static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_context)
static int raid_map(struct dm_target *ti, struct bio *bio)
{
struct raid_set *rs = ti->private;
struct mddev *mddev = &rs->md;
......@@ -1430,7 +1432,7 @@ static void raid_resume(struct dm_target *ti)
static struct target_type raid_target = {
.name = "raid",
.version = {1, 3, 1},
.version = {1, 4, 0},
.module = THIS_MODULE,
.ctr = raid_ctr,
.dtr = raid_dtr,
......
......@@ -61,7 +61,6 @@ struct mirror_set {
struct dm_region_hash *rh;
struct dm_kcopyd_client *kcopyd_client;
struct dm_io_client *io_client;
mempool_t *read_record_pool;
/* recovery */
region_t nr_regions;
......@@ -139,14 +138,13 @@ static void dispatch_bios(void *context, struct bio_list *bio_list)
queue_bio(ms, bio, WRITE);
}
#define MIN_READ_RECORDS 20
struct dm_raid1_read_record {
struct dm_raid1_bio_record {
struct mirror *m;
/* if details->bi_bdev == NULL, details were not saved */
struct dm_bio_details details;
region_t write_region;
};
static struct kmem_cache *_dm_raid1_read_record_cache;
/*
* Every mirror should look like this one.
*/
......@@ -876,19 +874,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
atomic_set(&ms->suspend, 0);
atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS,
_dm_raid1_read_record_cache);
if (!ms->read_record_pool) {
ti->error = "Error creating mirror read_record_pool";
kfree(ms);
return NULL;
}
ms->io_client = dm_io_client_create();
if (IS_ERR(ms->io_client)) {
ti->error = "Error creating dm_io client";
mempool_destroy(ms->read_record_pool);
kfree(ms);
return NULL;
}
......@@ -900,7 +888,6 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
if (IS_ERR(ms->rh)) {
ti->error = "Error creating dirty region hash";
dm_io_client_destroy(ms->io_client);
mempool_destroy(ms->read_record_pool);
kfree(ms);
return NULL;
}
......@@ -916,7 +903,6 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti,
dm_io_client_destroy(ms->io_client);
dm_region_hash_destroy(ms->rh);
mempool_destroy(ms->read_record_pool);
kfree(ms);
}
......@@ -1088,6 +1074,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_flush_requests = 1;
ti->num_discard_requests = 1;
ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record);
ti->discard_zeroes_data_unsupported = true;
ms->kmirrord_wq = alloc_workqueue("kmirrord",
......@@ -1155,18 +1142,20 @@ static void mirror_dtr(struct dm_target *ti)
/*
* Mirror mapping function
*/
static int mirror_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int mirror_map(struct dm_target *ti, struct bio *bio)
{
int r, rw = bio_rw(bio);
struct mirror *m;
struct mirror_set *ms = ti->private;
struct dm_raid1_read_record *read_record = NULL;
struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
struct dm_raid1_bio_record *bio_record =
dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
bio_record->details.bi_bdev = NULL;
if (rw == WRITE) {
/* Save region for mirror_end_io() handler */
map_context->ll = dm_rh_bio_to_region(ms->rh, bio);
bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio);
queue_bio(ms, bio, rw);
return DM_MAPIO_SUBMITTED;
}
......@@ -1194,33 +1183,29 @@ static int mirror_map(struct dm_target *ti, struct bio *bio,
if (unlikely(!m))
return -EIO;
read_record = mempool_alloc(ms->read_record_pool, GFP_NOIO);
if (likely(read_record)) {
dm_bio_record(&read_record->details, bio);
map_context->ptr = read_record;
read_record->m = m;
}
dm_bio_record(&bio_record->details, bio);
bio_record->m = m;
map_bio(m, bio);
return DM_MAPIO_REMAPPED;
}
static int mirror_end_io(struct dm_target *ti, struct bio *bio,
int error, union map_info *map_context)
static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
{
int rw = bio_rw(bio);
struct mirror_set *ms = (struct mirror_set *) ti->private;
struct mirror *m = NULL;
struct dm_bio_details *bd = NULL;
struct dm_raid1_read_record *read_record = map_context->ptr;
struct dm_raid1_bio_record *bio_record =
dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
/*
* We need to dec pending if this was a write.
*/
if (rw == WRITE) {
if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)))
dm_rh_dec(ms->rh, map_context->ll);
dm_rh_dec(ms->rh, bio_record->write_region);
return error;
}
......@@ -1231,7 +1216,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
goto out;
if (unlikely(error)) {
if (!read_record) {
if (!bio_record->details.bi_bdev) {
/*
* There wasn't enough memory to record necessary
* information for a retry or there was no other
......@@ -1241,7 +1226,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
return -EIO;
}
m = read_record->m;
m = bio_record->m;
DMERR("Mirror read failed from %s. Trying alternative device.",
m->dev->name);
......@@ -1253,22 +1238,18 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
* mirror.
*/
if (default_ok(m) || mirror_available(ms, bio)) {
bd = &read_record->details;
bd = &bio_record->details;
dm_bio_restore(bd, bio);
mempool_free(read_record, ms->read_record_pool);
map_context->ptr = NULL;
bio_record->details.bi_bdev = NULL;
queue_bio(ms, bio, rw);
return 1;
return DM_ENDIO_INCOMPLETE;
}
DMERR("All replicated volumes dead, failing I/O");
}
out:
if (read_record) {
mempool_free(read_record, ms->read_record_pool);
map_context->ptr = NULL;
}
bio_record->details.bi_bdev = NULL;
return error;
}
......@@ -1422,7 +1403,7 @@ static int mirror_iterate_devices(struct dm_target *ti,
static struct target_type mirror_target = {
.name = "mirror",
.version = {1, 12, 1},
.version = {1, 13, 1},
.module = THIS_MODULE,
.ctr = mirror_ctr,
.dtr = mirror_dtr,
......@@ -1439,13 +1420,6 @@ static int __init dm_mirror_init(void)
{
int r;
_dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0);
if (!_dm_raid1_read_record_cache) {
DMERR("Can't allocate dm_raid1_read_record cache");
r = -ENOMEM;
goto bad_cache;
}
r = dm_register_target(&mirror_target);
if (r < 0) {
DMERR("Failed to register mirror target");
......@@ -1455,15 +1429,12 @@ static int __init dm_mirror_init(void)
return 0;
bad_target:
kmem_cache_destroy(_dm_raid1_read_record_cache);
bad_cache:
return r;
}
static void __exit dm_mirror_exit(void)
{
dm_unregister_target(&mirror_target);
kmem_cache_destroy(_dm_raid1_read_record_cache);
}
/* Module hooks */
......
......@@ -79,7 +79,6 @@ struct dm_snapshot {
/* Chunks with outstanding reads */
spinlock_t tracked_chunk_lock;
mempool_t *tracked_chunk_pool;
struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
/* The on disk metadata handler */
......@@ -191,35 +190,38 @@ struct dm_snap_tracked_chunk {
chunk_t chunk;
};
static struct kmem_cache *tracked_chunk_cache;
static void init_tracked_chunk(struct bio *bio)
{
struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
INIT_HLIST_NODE(&c->node);
}
static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s,
chunk_t chunk)
static bool is_bio_tracked(struct bio *bio)
{
struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool,
GFP_NOIO);
unsigned long flags;
struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
return !hlist_unhashed(&c->node);
}
static void track_chunk(struct dm_snapshot *s, struct bio *bio, chunk_t chunk)
{
struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
c->chunk = chunk;
spin_lock_irqsave(&s->tracked_chunk_lock, flags);
spin_lock_irq(&s->tracked_chunk_lock);
hlist_add_head(&c->node,
&s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]);
spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
return c;
spin_unlock_irq(&s->tracked_chunk_lock);
}
static void stop_tracking_chunk(struct dm_snapshot *s,
struct dm_snap_tracked_chunk *c)
static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio)
{
struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
unsigned long flags;
spin_lock_irqsave(&s->tracked_chunk_lock, flags);
hlist_del(&c->node);
spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
mempool_free(c, s->tracked_chunk_pool);
}
static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
......@@ -1120,14 +1122,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad_pending_pool;
}
s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
tracked_chunk_cache);
if (!s->tracked_chunk_pool) {
ti->error = "Could not allocate tracked_chunk mempool for "
"tracking reads";
goto bad_tracked_chunk_pool;
}
for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]);
......@@ -1135,6 +1129,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->private = s;
ti->num_flush_requests = num_flush_requests;
ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk);
/* Add snapshot to the list of snapshots for this origin */
/* Exceptions aren't triggered till snapshot_resume() is called */
......@@ -1183,9 +1178,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
unregister_snapshot(s);
bad_load_and_register:
mempool_destroy(s->tracked_chunk_pool);
bad_tracked_chunk_pool:
mempool_destroy(s->pending_pool);
bad_pending_pool:
......@@ -1290,8 +1282,6 @@ static void snapshot_dtr(struct dm_target *ti)
BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i]));
#endif
mempool_destroy(s->tracked_chunk_pool);
__free_exceptions(s);
mempool_destroy(s->pending_pool);
......@@ -1577,8 +1567,7 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
s->store->chunk_mask);
}
static int snapshot_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int snapshot_map(struct dm_target *ti, struct bio *bio)
{
struct dm_exception *e;
struct dm_snapshot *s = ti->private;
......@@ -1586,6 +1575,8 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
chunk_t chunk;
struct dm_snap_pending_exception *pe = NULL;
init_tracked_chunk(bio);
if (bio->bi_rw & REQ_FLUSH) {
bio->bi_bdev = s->cow->bdev;
return DM_MAPIO_REMAPPED;
......@@ -1670,7 +1661,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
}
} else {
bio->bi_bdev = s->origin->bdev;
map_context->ptr = track_chunk(s, chunk);
track_chunk(s, bio, chunk);
}
out_unlock:
......@@ -1691,20 +1682,20 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
* If merging is currently taking place on the chunk in question, the
* I/O is deferred by adding it to s->bios_queued_during_merge.
*/
static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
{
struct dm_exception *e;
struct dm_snapshot *s = ti->private;
int r = DM_MAPIO_REMAPPED;
chunk_t chunk;
init_tracked_chunk(bio);
if (bio->bi_rw & REQ_FLUSH) {
if (!map_context->target_request_nr)
if (!dm_bio_get_target_request_nr(bio))
bio->bi_bdev = s->origin->bdev;
else
bio->bi_bdev = s->cow->bdev;
map_context->ptr = NULL;
return DM_MAPIO_REMAPPED;
}
......@@ -1733,7 +1724,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
remap_exception(s, e, bio, chunk);
if (bio_rw(bio) == WRITE)
map_context->ptr = track_chunk(s, chunk);
track_chunk(s, bio, chunk);
goto out_unlock;
}
......@@ -1751,14 +1742,12 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
return r;
}
static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
int error, union map_info *map_context)
static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error)
{
struct dm_snapshot *s = ti->private;
struct dm_snap_tracked_chunk *c = map_context->ptr;
if (c)
stop_tracking_chunk(s, c);
if (is_bio_tracked(bio))
stop_tracking_chunk(s, bio);
return 0;
}
......@@ -2127,8 +2116,7 @@ static void origin_dtr(struct dm_target *ti)
dm_put_device(ti, dev);
}
static int origin_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int origin_map(struct dm_target *ti, struct bio *bio)
{
struct dm_dev *dev = ti->private;
bio->bi_bdev = dev->bdev;
......@@ -2193,7 +2181,7 @@ static int origin_iterate_devices(struct dm_target *ti,
static struct target_type origin_target = {
.name = "snapshot-origin",
.version = {1, 7, 1},
.version = {1, 8, 0},
.module = THIS_MODULE,
.ctr = origin_ctr,
.dtr = origin_dtr,
......@@ -2206,7 +2194,7 @@ static struct target_type origin_target = {
static struct target_type snapshot_target = {
.name = "snapshot",
.version = {1, 10, 0},
.version = {1, 11, 0},
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
......@@ -2220,7 +2208,7 @@ static struct target_type snapshot_target = {
static struct target_type merge_target = {
.name = dm_snapshot_merge_target_name,
.version = {1, 1, 0},
.version = {1, 2, 0},
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
......@@ -2281,17 +2269,8 @@ static int __init dm_snapshot_init(void)
goto bad_pending_cache;
}
tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0);
if (!tracked_chunk_cache) {
DMERR("Couldn't create cache to track chunks in use.");
r = -ENOMEM;
goto bad_tracked_chunk_cache;
}
return 0;
bad_tracked_chunk_cache:
kmem_cache_destroy(pending_cache);
bad_pending_cache:
kmem_cache_destroy(exception_cache);
bad_exception_cache:
......@@ -2317,7 +2296,6 @@ static void __exit dm_snapshot_exit(void)
exit_origin_hash();
kmem_cache_destroy(pending_cache);
kmem_cache_destroy(exception_cache);
kmem_cache_destroy(tracked_chunk_cache);
dm_exception_store_exit();
}
......
......@@ -162,6 +162,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_flush_requests = stripes;
ti->num_discard_requests = stripes;
ti->num_write_same_requests = stripes;
sc->chunk_size = chunk_size;
if (chunk_size & (chunk_size - 1))
......@@ -251,7 +252,7 @@ static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector,
*result += sc->chunk_size; /* next chunk */
}
static int stripe_map_discard(struct stripe_c *sc, struct bio *bio,
static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
uint32_t target_stripe)
{
sector_t begin, end;
......@@ -271,23 +272,23 @@ static int stripe_map_discard(struct stripe_c *sc, struct bio *bio,
}
}
static int stripe_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int stripe_map(struct dm_target *ti, struct bio *bio)
{
struct stripe_c *sc = ti->private;
uint32_t stripe;
unsigned target_request_nr;
if (bio->bi_rw & REQ_FLUSH) {
target_request_nr = map_context->target_request_nr;
target_request_nr = dm_bio_get_target_request_nr(bio);
BUG_ON(target_request_nr >= sc->stripes);
bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
return DM_MAPIO_REMAPPED;
}
if (unlikely(bio->bi_rw & REQ_DISCARD)) {
target_request_nr = map_context->target_request_nr;
if (unlikely(bio->bi_rw & REQ_DISCARD) ||
unlikely(bio->bi_rw & REQ_WRITE_SAME)) {
target_request_nr = dm_bio_get_target_request_nr(bio);
BUG_ON(target_request_nr >= sc->stripes);
return stripe_map_discard(sc, bio, target_request_nr);
return stripe_map_range(sc, bio, target_request_nr);
}
stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector);
......@@ -342,8 +343,7 @@ static int stripe_status(struct dm_target *ti, status_type_t type,
return 0;
}
static int stripe_end_io(struct dm_target *ti, struct bio *bio,
int error, union map_info *map_context)
static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
{
unsigned i;
char major_minor[16];
......
......@@ -967,13 +967,22 @@ bool dm_table_request_based(struct dm_table *t)
int dm_table_alloc_md_mempools(struct dm_table *t)
{
unsigned type = dm_table_get_type(t);
unsigned per_bio_data_size = 0;
struct dm_target *tgt;
unsigned i;
if (unlikely(type == DM_TYPE_NONE)) {
DMWARN("no table type is set, can't allocate mempools");
return -EINVAL;
}
t->mempools = dm_alloc_md_mempools(type, t->integrity_supported);
if (type == DM_TYPE_BIO_BASED)
for (i = 0; i < t->num_targets; i++) {
tgt = t->targets + i;
per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size);
}
t->mempools = dm_alloc_md_mempools(type, t->integrity_supported, per_bio_data_size);
if (!t->mempools)
return -ENOMEM;
......@@ -1414,6 +1423,33 @@ static bool dm_table_all_devices_attribute(struct dm_table *t,
return 1;
}
static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
struct request_queue *q = bdev_get_queue(dev->bdev);
return q && !q->limits.max_write_same_sectors;
}
static bool dm_table_supports_write_same(struct dm_table *t)
{
struct dm_target *ti;
unsigned i = 0;
while (i < dm_table_get_num_targets(t)) {
ti = dm_table_get_target(t, i++);
if (!ti->num_write_same_requests)
return false;
if (!ti->type->iterate_devices ||
!ti->type->iterate_devices(ti, device_not_write_same_capable, NULL))
return false;
}
return true;
}
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
......@@ -1445,6 +1481,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
else
queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
if (!dm_table_supports_write_same(t))
q->limits.max_write_same_sectors = 0;
dm_table_set_integrity(t);
/*
......
......@@ -126,15 +126,14 @@ static void io_err_dtr(struct dm_target *tt)
/* empty */
}
static int io_err_map(struct dm_target *tt, struct bio *bio,
union map_info *map_context)
static int io_err_map(struct dm_target *tt, struct bio *bio)
{
return -EIO;
}
static struct target_type error_target = {
.name = "error",
.version = {1, 0, 1},
.version = {1, 1, 0},
.ctr = io_err_ctr,
.dtr = io_err_dtr,
.map = io_err_map,
......
......@@ -408,7 +408,7 @@ static void __setup_btree_details(struct dm_pool_metadata *pmd)
pmd->tl_info.tm = pmd->tm;
pmd->tl_info.levels = 1;
pmd->tl_info.value_type.context = &pmd->info;
pmd->tl_info.value_type.context = &pmd->bl_info;
pmd->tl_info.value_type.size = sizeof(__le64);
pmd->tl_info.value_type.inc = subtree_inc;
pmd->tl_info.value_type.dec = subtree_dec;
......
......@@ -186,7 +186,6 @@ struct pool {
struct dm_thin_new_mapping *next_mapping;
mempool_t *mapping_pool;
mempool_t *endio_hook_pool;
process_bio_fn process_bio;
process_bio_fn process_discard;
......@@ -304,7 +303,7 @@ static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master)
bio_list_init(master);
while ((bio = bio_list_pop(&bios))) {
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
if (h->tc == tc)
bio_endio(bio, DM_ENDIO_REQUEUE);
......@@ -368,6 +367,17 @@ static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
dm_thin_changed_this_transaction(tc->td);
}
static void inc_all_io_entry(struct pool *pool, struct bio *bio)
{
struct dm_thin_endio_hook *h;
if (bio->bi_rw & REQ_DISCARD)
return;
h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
h->all_io_entry = dm_deferred_entry_inc(pool->all_io_ds);
}
static void issue(struct thin_c *tc, struct bio *bio)
{
struct pool *pool = tc->pool;
......@@ -474,7 +484,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
static void overwrite_endio(struct bio *bio, int err)
{
unsigned long flags;
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
struct dm_thin_new_mapping *m = h->overwrite_mapping;
struct pool *pool = m->tc->pool;
......@@ -499,8 +509,7 @@ static void overwrite_endio(struct bio *bio, int err)
/*
* This sends the bios in the cell back to the deferred_bios list.
*/
static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell,
dm_block_t data_block)
static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell)
{
struct pool *pool = tc->pool;
unsigned long flags;
......@@ -513,17 +522,13 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell,
}
/*
* Same as cell_defer above, except it omits one particular detainee,
* a write bio that covers the block and has already been processed.
* Same as cell_defer except it omits the original holder of the cell.
*/
static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell)
static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell)
{
struct bio_list bios;
struct pool *pool = tc->pool;
unsigned long flags;
bio_list_init(&bios);
spin_lock_irqsave(&pool->lock, flags);
dm_cell_release_no_holder(cell, &pool->deferred_bios);
spin_unlock_irqrestore(&pool->lock, flags);
......@@ -561,7 +566,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
*/
r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block);
if (r) {
DMERR("dm_thin_insert_block() failed");
DMERR_LIMIT("dm_thin_insert_block() failed");
dm_cell_error(m->cell);
goto out;
}
......@@ -573,10 +578,10 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
* the bios in the cell.
*/
if (bio) {
cell_defer_except(tc, m->cell);
cell_defer_no_holder(tc, m->cell);
bio_endio(bio, 0);
} else
cell_defer(tc, m->cell, m->data_block);
cell_defer(tc, m->cell);
out:
list_del(&m->list);
......@@ -588,8 +593,8 @@ static void process_prepared_discard_fail(struct dm_thin_new_mapping *m)
struct thin_c *tc = m->tc;
bio_io_error(m->bio);
cell_defer_except(tc, m->cell);
cell_defer_except(tc, m->cell2);
cell_defer_no_holder(tc, m->cell);
cell_defer_no_holder(tc, m->cell2);
mempool_free(m, tc->pool->mapping_pool);
}
......@@ -597,13 +602,15 @@ static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m)
{
struct thin_c *tc = m->tc;
inc_all_io_entry(tc->pool, m->bio);
cell_defer_no_holder(tc, m->cell);
cell_defer_no_holder(tc, m->cell2);
if (m->pass_discard)
remap_and_issue(tc, m->bio, m->data_block);
else
bio_endio(m->bio, 0);
cell_defer_except(tc, m->cell);
cell_defer_except(tc, m->cell2);
mempool_free(m, tc->pool->mapping_pool);
}
......@@ -614,7 +621,7 @@ static void process_prepared_discard(struct dm_thin_new_mapping *m)
r = dm_thin_remove_block(tc->td, m->virt_block);
if (r)
DMERR("dm_thin_remove_block() failed");
DMERR_LIMIT("dm_thin_remove_block() failed");
process_prepared_discard_passdown(m);
}
......@@ -706,11 +713,12 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
* bio immediately. Otherwise we use kcopyd to clone the data first.
*/
if (io_overwrites_block(pool, bio)) {
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
h->overwrite_mapping = m;
m->bio = bio;
save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
inc_all_io_entry(pool, bio);
remap_and_issue(tc, bio, data_dest);
} else {
struct dm_io_region from, to;
......@@ -727,7 +735,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
0, copy_complete, m);
if (r < 0) {
mempool_free(m, pool->mapping_pool);
DMERR("dm_kcopyd_copy() failed");
DMERR_LIMIT("dm_kcopyd_copy() failed");
dm_cell_error(cell);
}
}
......@@ -775,11 +783,12 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
process_prepared_mapping(m);
else if (io_overwrites_block(pool, bio)) {
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
h->overwrite_mapping = m;
m->bio = bio;
save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
inc_all_io_entry(pool, bio);
remap_and_issue(tc, bio, data_block);
} else {
int r;
......@@ -792,7 +801,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m);
if (r < 0) {
mempool_free(m, pool->mapping_pool);
DMERR("dm_kcopyd_zero() failed");
DMERR_LIMIT("dm_kcopyd_zero() failed");
dm_cell_error(cell);
}
}
......@@ -804,7 +813,7 @@ static int commit(struct pool *pool)
r = dm_pool_commit_metadata(pool->pmd);
if (r)
DMERR("commit failed, error = %d", r);
DMERR_LIMIT("commit failed: error = %d", r);
return r;
}
......@@ -889,7 +898,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
*/
static void retry_on_resume(struct bio *bio)
{
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
struct thin_c *tc = h->tc;
struct pool *pool = tc->pool;
unsigned long flags;
......@@ -936,7 +945,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
*/
build_data_key(tc->td, lookup_result.block, &key2);
if (dm_bio_detain(tc->pool->prison, &key2, bio, &cell2)) {
dm_cell_release_singleton(cell, bio);
cell_defer_no_holder(tc, cell);
break;
}
......@@ -962,13 +971,15 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
wake_worker(pool);
}
} else {
inc_all_io_entry(pool, bio);
cell_defer_no_holder(tc, cell);
cell_defer_no_holder(tc, cell2);
/*
* The DM core makes sure that the discard doesn't span
* a block boundary. So we submit the discard of a
* partial block appropriately.
*/
dm_cell_release_singleton(cell, bio);
dm_cell_release_singleton(cell2, bio);
if ((!lookup_result.shared) && pool->pf.discard_passdown)
remap_and_issue(tc, bio, lookup_result.block);
else
......@@ -980,13 +991,14 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
/*
* It isn't provisioned, just forget it.
*/
dm_cell_release_singleton(cell, bio);
cell_defer_no_holder(tc, cell);
bio_endio(bio, 0);
break;
default:
DMERR("discard: find block unexpectedly returned %d", r);
dm_cell_release_singleton(cell, bio);
DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
__func__, r);
cell_defer_no_holder(tc, cell);
bio_io_error(bio);
break;
}
......@@ -1012,7 +1024,8 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
break;
default:
DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
DMERR_LIMIT("%s: alloc_data_block() failed: error = %d",
__func__, r);
dm_cell_error(cell);
break;
}
......@@ -1037,11 +1050,12 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio,
if (bio_data_dir(bio) == WRITE && bio->bi_size)
break_sharing(tc, bio, block, &key, lookup_result, cell);
else {
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds);
inc_all_io_entry(pool, bio);
cell_defer_no_holder(tc, cell);
dm_cell_release_singleton(cell, bio);
remap_and_issue(tc, bio, lookup_result->block);
}
}
......@@ -1056,7 +1070,9 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
* Remap empty bios (flushes) immediately, without provisioning.
*/
if (!bio->bi_size) {
dm_cell_release_singleton(cell, bio);
inc_all_io_entry(tc->pool, bio);
cell_defer_no_holder(tc, cell);
remap_and_issue(tc, bio, 0);
return;
}
......@@ -1066,7 +1082,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
*/
if (bio_data_dir(bio) == READ) {
zero_fill_bio(bio);
dm_cell_release_singleton(cell, bio);
cell_defer_no_holder(tc, cell);
bio_endio(bio, 0);
return;
}
......@@ -1085,7 +1101,8 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
break;
default:
DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
DMERR_LIMIT("%s: alloc_data_block() failed: error = %d",
__func__, r);
set_pool_mode(tc->pool, PM_READ_ONLY);
dm_cell_error(cell);
break;
......@@ -1111,34 +1128,31 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
switch (r) {
case 0:
/*
* We can release this cell now. This thread is the only
* one that puts bios into a cell, and we know there were
* no preceding bios.
*/
/*
* TODO: this will probably have to change when discard goes
* back in.
*/
dm_cell_release_singleton(cell, bio);
if (lookup_result.shared)
if (lookup_result.shared) {
process_shared_bio(tc, bio, block, &lookup_result);
else
cell_defer_no_holder(tc, cell);
} else {
inc_all_io_entry(tc->pool, bio);
cell_defer_no_holder(tc, cell);
remap_and_issue(tc, bio, lookup_result.block);
}
break;
case -ENODATA:
if (bio_data_dir(bio) == READ && tc->origin_dev) {
dm_cell_release_singleton(cell, bio);
inc_all_io_entry(tc->pool, bio);
cell_defer_no_holder(tc, cell);
remap_to_origin_and_issue(tc, bio);
} else
provision_block(tc, bio, block, cell);
break;
default:
DMERR("dm_thin_find_block() failed, error = %d", r);
dm_cell_release_singleton(cell, bio);
DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
__func__, r);
cell_defer_no_holder(tc, cell);
bio_io_error(bio);
break;
}
......@@ -1156,8 +1170,10 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
case 0:
if (lookup_result.shared && (rw == WRITE) && bio->bi_size)
bio_io_error(bio);
else
else {
inc_all_io_entry(tc->pool, bio);
remap_and_issue(tc, bio, lookup_result.block);
}
break;
case -ENODATA:
......@@ -1167,6 +1183,7 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
}
if (tc->origin_dev) {
inc_all_io_entry(tc->pool, bio);
remap_to_origin_and_issue(tc, bio);
break;
}
......@@ -1176,7 +1193,8 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
break;
default:
DMERR("dm_thin_find_block() failed, error = %d", r);
DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
__func__, r);
bio_io_error(bio);
break;
}
......@@ -1207,7 +1225,7 @@ static void process_deferred_bios(struct pool *pool)
spin_unlock_irqrestore(&pool->lock, flags);
while ((bio = bio_list_pop(&bios))) {
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
struct thin_c *tc = h->tc;
/*
......@@ -1340,32 +1358,30 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio)
wake_worker(pool);
}
static struct dm_thin_endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio)
static void thin_hook_bio(struct thin_c *tc, struct bio *bio)
{
struct pool *pool = tc->pool;
struct dm_thin_endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO);
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
h->tc = tc;
h->shared_read_entry = NULL;
h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : dm_deferred_entry_inc(pool->all_io_ds);
h->all_io_entry = NULL;
h->overwrite_mapping = NULL;
return h;
}
/*
* Non-blocking function called from the thin target's map function.
*/
static int thin_bio_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int thin_bio_map(struct dm_target *ti, struct bio *bio)
{
int r;
struct thin_c *tc = ti->private;
dm_block_t block = get_bio_block(tc, bio);
struct dm_thin_device *td = tc->td;
struct dm_thin_lookup_result result;
struct dm_bio_prison_cell *cell1, *cell2;
struct dm_cell_key key;
map_context->ptr = thin_hook_bio(tc, bio);
thin_hook_bio(tc, bio);
if (get_pool_mode(tc->pool) == PM_FAIL) {
bio_io_error(bio);
......@@ -1400,12 +1416,25 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
* shared flag will be set in their case.
*/
thin_defer_bio(tc, bio);
r = DM_MAPIO_SUBMITTED;
} else {
remap(tc, bio, result.block);
r = DM_MAPIO_REMAPPED;
return DM_MAPIO_SUBMITTED;
}
break;
build_virtual_key(tc->td, block, &key);
if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1))
return DM_MAPIO_SUBMITTED;
build_data_key(tc->td, result.block, &key);
if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2)) {
cell_defer_no_holder(tc, cell1);
return DM_MAPIO_SUBMITTED;
}
inc_all_io_entry(tc->pool, bio);
cell_defer_no_holder(tc, cell2);
cell_defer_no_holder(tc, cell1);
remap(tc, bio, result.block);
return DM_MAPIO_REMAPPED;
case -ENODATA:
if (get_pool_mode(tc->pool) == PM_READ_ONLY) {
......@@ -1414,8 +1443,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
* of doing so. Just error it.
*/
bio_io_error(bio);
r = DM_MAPIO_SUBMITTED;
break;
return DM_MAPIO_SUBMITTED;
}
/* fall through */
......@@ -1425,8 +1453,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
* provide the hint to load the metadata into cache.
*/
thin_defer_bio(tc, bio);
r = DM_MAPIO_SUBMITTED;
break;
return DM_MAPIO_SUBMITTED;
default:
/*
......@@ -1435,11 +1462,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
* pool is switched to fail-io mode.
*/
bio_io_error(bio);
r = DM_MAPIO_SUBMITTED;
break;
return DM_MAPIO_SUBMITTED;
}
return r;
}
static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
......@@ -1566,14 +1590,12 @@ static void __pool_destroy(struct pool *pool)
if (pool->next_mapping)
mempool_free(pool->next_mapping, pool->mapping_pool);
mempool_destroy(pool->mapping_pool);
mempool_destroy(pool->endio_hook_pool);
dm_deferred_set_destroy(pool->shared_read_ds);
dm_deferred_set_destroy(pool->all_io_ds);
kfree(pool);
}
static struct kmem_cache *_new_mapping_cache;
static struct kmem_cache *_endio_hook_cache;
static struct pool *pool_create(struct mapped_device *pool_md,
struct block_device *metadata_dev,
......@@ -1667,13 +1689,6 @@ static struct pool *pool_create(struct mapped_device *pool_md,
goto bad_mapping_pool;
}
pool->endio_hook_pool = mempool_create_slab_pool(ENDIO_HOOK_POOL_SIZE,
_endio_hook_cache);
if (!pool->endio_hook_pool) {
*error = "Error creating pool's endio_hook mempool";
err_p = ERR_PTR(-ENOMEM);
goto bad_endio_hook_pool;
}
pool->ref_count = 1;
pool->last_commit_jiffies = jiffies;
pool->pool_md = pool_md;
......@@ -1682,8 +1697,6 @@ static struct pool *pool_create(struct mapped_device *pool_md,
return pool;
bad_endio_hook_pool:
mempool_destroy(pool->mapping_pool);
bad_mapping_pool:
dm_deferred_set_destroy(pool->all_io_ds);
bad_all_io_ds:
......@@ -1966,8 +1979,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
return r;
}
static int pool_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int pool_map(struct dm_target *ti, struct bio *bio)
{
int r;
struct pool_c *pt = ti->private;
......@@ -2358,7 +2370,9 @@ static int pool_status(struct dm_target *ti, status_type_t type,
else
DMEMIT("rw ");
if (pool->pf.discard_enabled && pool->pf.discard_passdown)
if (!pool->pf.discard_enabled)
DMEMIT("ignore_discard");
else if (pool->pf.discard_passdown)
DMEMIT("discard_passdown");
else
DMEMIT("no_discard_passdown");
......@@ -2454,7 +2468,7 @@ static struct target_type pool_target = {
.name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE,
.version = {1, 5, 0},
.version = {1, 6, 0},
.module = THIS_MODULE,
.ctr = pool_ctr,
.dtr = pool_dtr,
......@@ -2576,6 +2590,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->num_flush_requests = 1;
ti->flush_supported = true;
ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook);
/* In case the pool supports discards, pass them on. */
if (tc->pool->pf.discard_enabled) {
......@@ -2609,20 +2624,17 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
return r;
}
static int thin_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int thin_map(struct dm_target *ti, struct bio *bio)
{
bio->bi_sector = dm_target_offset(ti, bio->bi_sector);
return thin_bio_map(ti, bio, map_context);
return thin_bio_map(ti, bio);
}
static int thin_endio(struct dm_target *ti,
struct bio *bio, int err,
union map_info *map_context)
static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
{
unsigned long flags;
struct dm_thin_endio_hook *h = map_context->ptr;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
struct list_head work;
struct dm_thin_new_mapping *m, *tmp;
struct pool *pool = h->tc->pool;
......@@ -2643,13 +2655,14 @@ static int thin_endio(struct dm_target *ti,
if (h->all_io_entry) {
INIT_LIST_HEAD(&work);
dm_deferred_entry_dec(h->all_io_entry, &work);
if (!list_empty(&work)) {
spin_lock_irqsave(&pool->lock, flags);
list_for_each_entry_safe(m, tmp, &work, list)
list_add(&m->list, &pool->prepared_discards);
spin_unlock_irqrestore(&pool->lock, flags);
wake_worker(pool);
}
}
mempool_free(h, pool->endio_hook_pool);
return 0;
}
......@@ -2745,7 +2758,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type thin_target = {
.name = "thin",
.version = {1, 5, 0},
.version = {1, 6, 0},
.module = THIS_MODULE,
.ctr = thin_ctr,
.dtr = thin_dtr,
......@@ -2779,14 +2792,8 @@ static int __init dm_thin_init(void)
if (!_new_mapping_cache)
goto bad_new_mapping_cache;
_endio_hook_cache = KMEM_CACHE(dm_thin_endio_hook, 0);
if (!_endio_hook_cache)
goto bad_endio_hook_cache;
return 0;
bad_endio_hook_cache:
kmem_cache_destroy(_new_mapping_cache);
bad_new_mapping_cache:
dm_unregister_target(&pool_target);
bad_pool_target:
......@@ -2801,7 +2808,6 @@ static void dm_thin_exit(void)
dm_unregister_target(&pool_target);
kmem_cache_destroy(_new_mapping_cache);
kmem_cache_destroy(_endio_hook_cache);
}
module_init(dm_thin_init);
......
......@@ -55,7 +55,6 @@ struct dm_verity {
unsigned shash_descsize;/* the size of temporary space for crypto */
int hash_failed; /* set to 1 if hash of any block failed */
mempool_t *io_mempool; /* mempool of struct dm_verity_io */
mempool_t *vec_mempool; /* mempool of bio vector */
struct workqueue_struct *verify_wq;
......@@ -66,7 +65,6 @@ struct dm_verity {
struct dm_verity_io {
struct dm_verity *v;
struct bio *bio;
/* original values of bio->bi_end_io and bio->bi_private */
bio_end_io_t *orig_bi_end_io;
......@@ -389,8 +387,8 @@ static int verity_verify_io(struct dm_verity_io *io)
*/
static void verity_finish_io(struct dm_verity_io *io, int error)
{
struct bio *bio = io->bio;
struct dm_verity *v = io->v;
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
bio->bi_end_io = io->orig_bi_end_io;
bio->bi_private = io->orig_bi_private;
......@@ -398,8 +396,6 @@ static void verity_finish_io(struct dm_verity_io *io, int error)
if (io->io_vec != io->io_vec_inline)
mempool_free(io->io_vec, v->vec_mempool);
mempool_free(io, v->io_mempool);
bio_endio(bio, error);
}
......@@ -462,8 +458,7 @@ static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
* Bio map function. It allocates dm_verity_io structure and bio vector and
* fills them. Then it issues prefetches and the I/O.
*/
static int verity_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int verity_map(struct dm_target *ti, struct bio *bio)
{
struct dm_verity *v = ti->private;
struct dm_verity_io *io;
......@@ -486,9 +481,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio,
if (bio_data_dir(bio) == WRITE)
return -EIO;
io = mempool_alloc(v->io_mempool, GFP_NOIO);
io = dm_per_bio_data(bio, ti->per_bio_data_size);
io->v = v;
io->bio = bio;
io->orig_bi_end_io = bio->bi_end_io;
io->orig_bi_private = bio->bi_private;
io->block = bio->bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
......@@ -610,9 +604,6 @@ static void verity_dtr(struct dm_target *ti)
if (v->vec_mempool)
mempool_destroy(v->vec_mempool);
if (v->io_mempool)
mempool_destroy(v->io_mempool);
if (v->bufio)
dm_bufio_client_destroy(v->bufio);
......@@ -841,13 +832,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
v->io_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE,
sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2);
if (!v->io_mempool) {
ti->error = "Cannot allocate io mempool";
r = -ENOMEM;
goto bad;
}
ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io));
v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE,
BIO_MAX_PAGES * sizeof(struct bio_vec));
......@@ -875,7 +860,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
static struct target_type verity_target = {
.name = "verity",
.version = {1, 0, 0},
.version = {1, 1, 0},
.module = THIS_MODULE,
.ctr = verity_ctr,
.dtr = verity_dtr,
......
......@@ -33,8 +33,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
/*
* Return zeros only on reads
*/
static int zero_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int zero_map(struct dm_target *ti, struct bio *bio)
{
switch(bio_rw(bio)) {
case READ:
......@@ -56,7 +55,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio,
static struct target_type zero_target = {
.name = "zero",
.version = {1, 0, 0},
.version = {1, 1, 0},
.module = THIS_MODULE,
.ctr = zero_ctr,
.map = zero_map,
......
......@@ -62,18 +62,6 @@ struct dm_io {
spinlock_t endio_lock;
};
/*
* For bio-based dm.
* One of these is allocated per target within a bio. Hopefully
* this will be simplified out one day.
*/
struct dm_target_io {
struct dm_io *io;
struct dm_target *ti;
union map_info info;
struct bio clone;
};
/*
* For request-based dm.
* One of these is allocated per request.
......@@ -657,7 +645,7 @@ static void clone_endio(struct bio *bio, int error)
error = -EIO;
if (endio) {
r = endio(tio->ti, bio, error, &tio->info);
r = endio(tio->ti, bio, error);
if (r < 0 || r == DM_ENDIO_REQUEUE)
/*
* error and requeue request are handled
......@@ -1016,7 +1004,7 @@ static void __map_bio(struct dm_target *ti, struct dm_target_io *tio)
*/
atomic_inc(&tio->io->io_count);
sector = clone->bi_sector;
r = ti->type->map(ti, clone, &tio->info);
r = ti->type->map(ti, clone);
if (r == DM_MAPIO_REMAPPED) {
/* the bio has been remapped so dispatch it */
......@@ -1111,6 +1099,7 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci,
tio->io = ci->io;
tio->ti = ti;
memset(&tio->info, 0, sizeof(tio->info));
tio->target_request_nr = 0;
return tio;
}
......@@ -1121,7 +1110,7 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs);
struct bio *clone = &tio->clone;
tio->info.target_request_nr = request_nr;
tio->target_request_nr = request_nr;
/*
* Discard requests require the bio's inline iovecs be initialized.
......@@ -1174,7 +1163,28 @@ static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti)
ci->sector_count = 0;
}
static int __clone_and_map_discard(struct clone_info *ci)
typedef unsigned (*get_num_requests_fn)(struct dm_target *ti);
static unsigned get_num_discard_requests(struct dm_target *ti)
{
return ti->num_discard_requests;
}
static unsigned get_num_write_same_requests(struct dm_target *ti)
{
return ti->num_write_same_requests;
}
typedef bool (*is_split_required_fn)(struct dm_target *ti);
static bool is_split_required_for_discard(struct dm_target *ti)
{
return ti->split_discard_requests;
}
static int __clone_and_map_changing_extent_only(struct clone_info *ci,
get_num_requests_fn get_num_requests,
is_split_required_fn is_split_required)
{
struct dm_target *ti;
sector_t len;
......@@ -1185,15 +1195,15 @@ static int __clone_and_map_discard(struct clone_info *ci)
return -EIO;
/*
* Even though the device advertised discard support,
* that does not mean every target supports it, and
* Even though the device advertised support for this type of
* request, that does not mean every target supports it, and
* reconfiguration might also have changed that since the
* check was performed.
*/
if (!ti->num_discard_requests)
if (!get_num_requests || !get_num_requests(ti))
return -EOPNOTSUPP;
if (!ti->split_discard_requests)
if (is_split_required && !is_split_required(ti))
len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
else
len = min(ci->sector_count, max_io_len(ci->sector, ti));
......@@ -1206,6 +1216,17 @@ static int __clone_and_map_discard(struct clone_info *ci)
return 0;
}
static int __clone_and_map_discard(struct clone_info *ci)
{
return __clone_and_map_changing_extent_only(ci, get_num_discard_requests,
is_split_required_for_discard);
}
static int __clone_and_map_write_same(struct clone_info *ci)
{
return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL);
}
static int __clone_and_map(struct clone_info *ci)
{
struct bio *bio = ci->bio;
......@@ -1215,6 +1236,8 @@ static int __clone_and_map(struct clone_info *ci)
if (unlikely(bio->bi_rw & REQ_DISCARD))
return __clone_and_map_discard(ci);
else if (unlikely(bio->bi_rw & REQ_WRITE_SAME))
return __clone_and_map_write_same(ci);
ti = dm_table_find_target(ci->map, ci->sector);
if (!dm_target_is_valid(ti))
......@@ -1946,13 +1969,20 @@ static void free_dev(struct mapped_device *md)
static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
{
struct dm_md_mempools *p;
struct dm_md_mempools *p = dm_table_get_md_mempools(t);
if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs)
/* the md already has necessary mempools */
if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) {
/*
* The md already has necessary mempools. Reload just the
* bioset because front_pad may have changed because
* a different table was loaded.
*/
bioset_free(md->bs);
md->bs = p->bs;
p->bs = NULL;
goto out;
}
p = dm_table_get_md_mempools(t);
BUG_ON(!p || md->io_pool || md->tio_pool || md->bs);
md->io_pool = p->io_pool;
......@@ -2711,7 +2741,7 @@ int dm_noflush_suspending(struct dm_target *ti)
}
EXPORT_SYMBOL_GPL(dm_noflush_suspending);
struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size)
{
struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS;
......@@ -2719,6 +2749,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
if (!pools)
return NULL;
per_bio_data_size = roundup(per_bio_data_size, __alignof__(struct dm_target_io));
pools->io_pool = (type == DM_TYPE_BIO_BASED) ?
mempool_create_slab_pool(MIN_IOS, _io_cache) :
mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache);
......@@ -2734,7 +2766,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
pools->bs = (type == DM_TYPE_BIO_BASED) ?
bioset_create(pool_size,
offsetof(struct dm_target_io, clone)) :
per_bio_data_size + offsetof(struct dm_target_io, clone)) :
bioset_create(pool_size,
offsetof(struct dm_rq_clone_bio_info, clone));
if (!pools->bs)
......
......@@ -159,7 +159,7 @@ void dm_kcopyd_exit(void);
/*
* Mempool operations
*/
struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity);
struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size);
void dm_free_md_mempools(struct dm_md_mempools *pools);
#endif
......@@ -428,15 +428,17 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm,
if (!v)
return 0;
r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio));
if (unlikely(r))
if (unlikely(r)) {
DMERR_LIMIT("%s validator check failed for block %llu", v->name,
(unsigned long long) dm_bufio_get_block_number(buf));
return r;
}
aux->validator = v;
} else {
if (unlikely(aux->validator != v)) {
DMERR("validator mismatch (old=%s vs new=%s) for block %llu",
DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu",
aux->validator->name, v ? v->name : "NULL",
(unsigned long long)
dm_bufio_get_block_number(buf));
(unsigned long long) dm_bufio_get_block_number(buf));
return -EINVAL;
}
}
......
......@@ -36,13 +36,13 @@ struct node_header {
__le32 padding;
} __packed;
struct node {
struct btree_node {
struct node_header header;
__le64 keys[0];
} __packed;
void inc_children(struct dm_transaction_manager *tm, struct node *n,
void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
struct dm_btree_value_type *vt);
int new_block(struct dm_btree_info *info, struct dm_block **result);
......@@ -64,7 +64,7 @@ struct ro_spine {
void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info);
int exit_ro_spine(struct ro_spine *s);
int ro_step(struct ro_spine *s, dm_block_t new_child);
struct node *ro_node(struct ro_spine *s);
struct btree_node *ro_node(struct ro_spine *s);
struct shadow_spine {
struct dm_btree_info *info;
......@@ -98,17 +98,17 @@ int shadow_root(struct shadow_spine *s);
/*
* Some inlines.
*/
static inline __le64 *key_ptr(struct node *n, uint32_t index)
static inline __le64 *key_ptr(struct btree_node *n, uint32_t index)
{
return n->keys + index;
}
static inline void *value_base(struct node *n)
static inline void *value_base(struct btree_node *n)
{
return &n->keys[le32_to_cpu(n->header.max_entries)];
}
static inline void *value_ptr(struct node *n, uint32_t index)
static inline void *value_ptr(struct btree_node *n, uint32_t index)
{
uint32_t value_size = le32_to_cpu(n->header.value_size);
return value_base(n) + (value_size * index);
......@@ -117,7 +117,7 @@ static inline void *value_ptr(struct node *n, uint32_t index)
/*
* Assumes the values are suitably-aligned and converts to core format.
*/
static inline uint64_t value64(struct node *n, uint32_t index)
static inline uint64_t value64(struct btree_node *n, uint32_t index)
{
__le64 *values_le = value_base(n);
......@@ -127,7 +127,7 @@ static inline uint64_t value64(struct node *n, uint32_t index)
/*
* Searching for a key within a single node.
*/
int lower_bound(struct node *n, uint64_t key);
int lower_bound(struct btree_node *n, uint64_t key);
extern struct dm_block_validator btree_node_validator;
......
......@@ -53,7 +53,7 @@
/*
* Some little utilities for moving node data around.
*/
static void node_shift(struct node *n, int shift)
static void node_shift(struct btree_node *n, int shift)
{
uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
uint32_t value_size = le32_to_cpu(n->header.value_size);
......@@ -79,7 +79,7 @@ static void node_shift(struct node *n, int shift)
}
}
static void node_copy(struct node *left, struct node *right, int shift)
static void node_copy(struct btree_node *left, struct btree_node *right, int shift)
{
uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
uint32_t value_size = le32_to_cpu(left->header.value_size);
......@@ -108,7 +108,7 @@ static void node_copy(struct node *left, struct node *right, int shift)
/*
* Delete a specific entry from a leaf node.
*/
static void delete_at(struct node *n, unsigned index)
static void delete_at(struct btree_node *n, unsigned index)
{
unsigned nr_entries = le32_to_cpu(n->header.nr_entries);
unsigned nr_to_copy = nr_entries - (index + 1);
......@@ -128,7 +128,7 @@ static void delete_at(struct node *n, unsigned index)
n->header.nr_entries = cpu_to_le32(nr_entries - 1);
}
static unsigned merge_threshold(struct node *n)
static unsigned merge_threshold(struct btree_node *n)
{
return le32_to_cpu(n->header.max_entries) / 3;
}
......@@ -136,7 +136,7 @@ static unsigned merge_threshold(struct node *n)
struct child {
unsigned index;
struct dm_block *block;
struct node *n;
struct btree_node *n;
};
static struct dm_btree_value_type le64_type = {
......@@ -147,7 +147,7 @@ static struct dm_btree_value_type le64_type = {
.equal = NULL
};
static int init_child(struct dm_btree_info *info, struct node *parent,
static int init_child(struct dm_btree_info *info, struct btree_node *parent,
unsigned index, struct child *result)
{
int r, inc;
......@@ -177,7 +177,7 @@ static int exit_child(struct dm_btree_info *info, struct child *c)
return dm_tm_unlock(info->tm, c->block);
}
static void shift(struct node *left, struct node *right, int count)
static void shift(struct btree_node *left, struct btree_node *right, int count)
{
uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
......@@ -203,11 +203,11 @@ static void shift(struct node *left, struct node *right, int count)
right->header.nr_entries = cpu_to_le32(nr_right + count);
}
static void __rebalance2(struct dm_btree_info *info, struct node *parent,
static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
struct child *l, struct child *r)
{
struct node *left = l->n;
struct node *right = r->n;
struct btree_node *left = l->n;
struct btree_node *right = r->n;
uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
unsigned threshold = 2 * merge_threshold(left) + 1;
......@@ -239,7 +239,7 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
unsigned left_index)
{
int r;
struct node *parent;
struct btree_node *parent;
struct child left, right;
parent = dm_block_data(shadow_current(s));
......@@ -270,9 +270,9 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
* in right, then rebalance2. This wastes some cpu, but I want something
* simple atm.
*/
static void delete_center_node(struct dm_btree_info *info, struct node *parent,
static void delete_center_node(struct dm_btree_info *info, struct btree_node *parent,
struct child *l, struct child *c, struct child *r,
struct node *left, struct node *center, struct node *right,
struct btree_node *left, struct btree_node *center, struct btree_node *right,
uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
{
uint32_t max_entries = le32_to_cpu(left->header.max_entries);
......@@ -301,9 +301,9 @@ static void delete_center_node(struct dm_btree_info *info, struct node *parent,
/*
* Redistributes entries among 3 sibling nodes.
*/
static void redistribute3(struct dm_btree_info *info, struct node *parent,
static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
struct child *l, struct child *c, struct child *r,
struct node *left, struct node *center, struct node *right,
struct btree_node *left, struct btree_node *center, struct btree_node *right,
uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
{
int s;
......@@ -343,12 +343,12 @@ static void redistribute3(struct dm_btree_info *info, struct node *parent,
*key_ptr(parent, r->index) = right->keys[0];
}
static void __rebalance3(struct dm_btree_info *info, struct node *parent,
static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
struct child *l, struct child *c, struct child *r)
{
struct node *left = l->n;
struct node *center = c->n;
struct node *right = r->n;
struct btree_node *left = l->n;
struct btree_node *center = c->n;
struct btree_node *right = r->n;
uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
......@@ -371,7 +371,7 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
unsigned left_index)
{
int r;
struct node *parent = dm_block_data(shadow_current(s));
struct btree_node *parent = dm_block_data(shadow_current(s));
struct child left, center, right;
/*
......@@ -421,7 +421,7 @@ static int get_nr_entries(struct dm_transaction_manager *tm,
{
int r;
struct dm_block *block;
struct node *n;
struct btree_node *n;
r = dm_tm_read_lock(tm, b, &btree_node_validator, &block);
if (r)
......@@ -438,7 +438,7 @@ static int rebalance_children(struct shadow_spine *s,
{
int i, r, has_left_sibling, has_right_sibling;
uint32_t child_entries;
struct node *n;
struct btree_node *n;
n = dm_block_data(shadow_current(s));
......@@ -483,7 +483,7 @@ static int rebalance_children(struct shadow_spine *s,
return r;
}
static int do_leaf(struct node *n, uint64_t key, unsigned *index)
static int do_leaf(struct btree_node *n, uint64_t key, unsigned *index)
{
int i = lower_bound(n, key);
......@@ -506,7 +506,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
uint64_t key, unsigned *index)
{
int i = *index, r;
struct node *n;
struct btree_node *n;
for (;;) {
r = shadow_step(s, root, vt);
......@@ -556,7 +556,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
unsigned level, last_level = info->levels - 1;
int index = 0, r = 0;
struct shadow_spine spine;
struct node *n;
struct btree_node *n;
init_shadow_spine(&spine, info);
for (level = 0; level < info->levels; level++) {
......
......@@ -23,7 +23,7 @@ static void node_prepare_for_write(struct dm_block_validator *v,
struct dm_block *b,
size_t block_size)
{
struct node *n = dm_block_data(b);
struct btree_node *n = dm_block_data(b);
struct node_header *h = &n->header;
h->blocknr = cpu_to_le64(dm_block_location(b));
......@@ -38,14 +38,14 @@ static int node_check(struct dm_block_validator *v,
struct dm_block *b,
size_t block_size)
{
struct node *n = dm_block_data(b);
struct btree_node *n = dm_block_data(b);
struct node_header *h = &n->header;
size_t value_size;
__le32 csum_disk;
uint32_t flags;
if (dm_block_location(b) != le64_to_cpu(h->blocknr)) {
DMERR("node_check failed blocknr %llu wanted %llu",
DMERR_LIMIT("node_check failed: blocknr %llu != wanted %llu",
le64_to_cpu(h->blocknr), dm_block_location(b));
return -ENOTBLK;
}
......@@ -54,7 +54,7 @@ static int node_check(struct dm_block_validator *v,
block_size - sizeof(__le32),
BTREE_CSUM_XOR));
if (csum_disk != h->csum) {
DMERR("node_check failed csum %u wanted %u",
DMERR_LIMIT("node_check failed: csum %u != wanted %u",
le32_to_cpu(csum_disk), le32_to_cpu(h->csum));
return -EILSEQ;
}
......@@ -63,12 +63,12 @@ static int node_check(struct dm_block_validator *v,
if (sizeof(struct node_header) +
(sizeof(__le64) + value_size) * le32_to_cpu(h->max_entries) > block_size) {
DMERR("node_check failed: max_entries too large");
DMERR_LIMIT("node_check failed: max_entries too large");
return -EILSEQ;
}
if (le32_to_cpu(h->nr_entries) > le32_to_cpu(h->max_entries)) {
DMERR("node_check failed, too many entries");
DMERR_LIMIT("node_check failed: too many entries");
return -EILSEQ;
}
......@@ -77,7 +77,7 @@ static int node_check(struct dm_block_validator *v,
*/
flags = le32_to_cpu(h->flags);
if (!(flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) {
DMERR("node_check failed, node is neither INTERNAL or LEAF");
DMERR_LIMIT("node_check failed: node is neither INTERNAL or LEAF");
return -EILSEQ;
}
......@@ -164,7 +164,7 @@ int ro_step(struct ro_spine *s, dm_block_t new_child)
return r;
}
struct node *ro_node(struct ro_spine *s)
struct btree_node *ro_node(struct ro_spine *s)
{
struct dm_block *block;
......
......@@ -38,7 +38,7 @@ static void array_insert(void *base, size_t elt_size, unsigned nr_elts,
/*----------------------------------------------------------------*/
/* makes the assumption that no two keys are the same. */
static int bsearch(struct node *n, uint64_t key, int want_hi)
static int bsearch(struct btree_node *n, uint64_t key, int want_hi)
{
int lo = -1, hi = le32_to_cpu(n->header.nr_entries);
......@@ -58,12 +58,12 @@ static int bsearch(struct node *n, uint64_t key, int want_hi)
return want_hi ? hi : lo;
}
int lower_bound(struct node *n, uint64_t key)
int lower_bound(struct btree_node *n, uint64_t key)
{
return bsearch(n, key, 0);
}
void inc_children(struct dm_transaction_manager *tm, struct node *n,
void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
struct dm_btree_value_type *vt)
{
unsigned i;
......@@ -77,7 +77,7 @@ void inc_children(struct dm_transaction_manager *tm, struct node *n,
vt->inc(vt->context, value_ptr(n, i));
}
static int insert_at(size_t value_size, struct node *node, unsigned index,
static int insert_at(size_t value_size, struct btree_node *node, unsigned index,
uint64_t key, void *value)
__dm_written_to_disk(value)
{
......@@ -122,7 +122,7 @@ int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root)
{
int r;
struct dm_block *b;
struct node *n;
struct btree_node *n;
size_t block_size;
uint32_t max_entries;
......@@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(dm_btree_empty);
#define MAX_SPINE_DEPTH 64
struct frame {
struct dm_block *b;
struct node *n;
struct btree_node *n;
unsigned level;
unsigned nr_children;
unsigned current_child;
......@@ -230,6 +230,11 @@ static void pop_frame(struct del_stack *s)
dm_tm_unlock(s->tm, f->b);
}
static bool is_internal_level(struct dm_btree_info *info, struct frame *f)
{
return f->level < (info->levels - 1);
}
int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
{
int r;
......@@ -241,7 +246,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
s->tm = info->tm;
s->top = -1;
r = push_frame(s, root, 1);
r = push_frame(s, root, 0);
if (r)
goto out;
......@@ -267,7 +272,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
if (r)
goto out;
} else if (f->level != (info->levels - 1)) {
} else if (is_internal_level(info, f)) {
b = value64(f->n, f->current_child);
f->current_child++;
r = push_frame(s, b, f->level + 1);
......@@ -295,7 +300,7 @@ EXPORT_SYMBOL_GPL(dm_btree_del);
/*----------------------------------------------------------------*/
static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
int (*search_fn)(struct node *, uint64_t),
int (*search_fn)(struct btree_node *, uint64_t),
uint64_t *result_key, void *v, size_t value_size)
{
int i, r;
......@@ -406,7 +411,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
size_t size;
unsigned nr_left, nr_right;
struct dm_block *left, *right, *parent;
struct node *ln, *rn, *pn;
struct btree_node *ln, *rn, *pn;
__le64 location;
left = shadow_current(s);
......@@ -491,7 +496,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
size_t size;
unsigned nr_left, nr_right;
struct dm_block *left, *right, *new_parent;
struct node *pn, *ln, *rn;
struct btree_node *pn, *ln, *rn;
__le64 val;
new_parent = shadow_current(s);
......@@ -576,7 +581,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
uint64_t key, unsigned *index)
{
int r, i = *index, top = 1;
struct node *node;
struct btree_node *node;
for (;;) {
r = shadow_step(s, root, vt);
......@@ -643,7 +648,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root,
unsigned level, index = -1, last_level = info->levels - 1;
dm_block_t block = root;
struct shadow_spine spine;
struct node *n;
struct btree_node *n;
struct dm_btree_value_type le64_type;
le64_type.context = NULL;
......
......@@ -39,7 +39,7 @@ static int index_check(struct dm_block_validator *v,
__le32 csum_disk;
if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
DMERR("index_check failed blocknr %llu wanted %llu",
DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu",
le64_to_cpu(mi_le->blocknr), dm_block_location(b));
return -ENOTBLK;
}
......@@ -48,7 +48,7 @@ static int index_check(struct dm_block_validator *v,
block_size - sizeof(__le32),
INDEX_CSUM_XOR));
if (csum_disk != mi_le->csum) {
DMERR("index_check failed csum %u wanted %u",
DMERR_LIMIT("index_check failed: csum %u != wanted %u",
le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
return -EILSEQ;
}
......@@ -89,7 +89,7 @@ static int bitmap_check(struct dm_block_validator *v,
__le32 csum_disk;
if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
DMERR("bitmap check failed blocknr %llu wanted %llu",
DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
le64_to_cpu(disk_header->blocknr), dm_block_location(b));
return -ENOTBLK;
}
......@@ -98,7 +98,7 @@ static int bitmap_check(struct dm_block_validator *v,
block_size - sizeof(__le32),
BITMAP_CSUM_XOR));
if (csum_disk != disk_header->csum) {
DMERR("bitmap check failed csum %u wanted %u",
DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
return -EILSEQ;
}
......
......@@ -337,7 +337,7 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
{
int r = sm_metadata_new_block_(sm, b);
if (r)
DMERR("out of metadata space");
DMERR("unable to allocate new metadata block");
return r;
}
......
......@@ -23,7 +23,6 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
union map_info {
void *ptr;
unsigned long long ll;
unsigned target_request_nr;
};
/*
......@@ -46,8 +45,7 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti);
* = 1: simple remap complete
* = 2: The target wants to push back the io
*/
typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
union map_info *map_context);
typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio);
typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
union map_info *map_context);
......@@ -60,8 +58,7 @@ typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
* 2 : The target wants to push back the io
*/
typedef int (*dm_endio_fn) (struct dm_target *ti,
struct bio *bio, int error,
union map_info *map_context);
struct bio *bio, int error);
typedef int (*dm_request_endio_fn) (struct dm_target *ti,
struct request *clone, int error,
union map_info *map_context);
......@@ -193,18 +190,30 @@ struct dm_target {
* A number of zero-length barrier requests that will be submitted
* to the target for the purpose of flushing cache.
*
* The request number will be placed in union map_info->target_request_nr.
* The request number can be accessed with dm_bio_get_target_request_nr.
* It is a responsibility of the target driver to remap these requests
* to the real underlying devices.
*/
unsigned num_flush_requests;
/*
* The number of discard requests that will be submitted to the
* target. map_info->request_nr is used just like num_flush_requests.
* The number of discard requests that will be submitted to the target.
* The request number can be accessed with dm_bio_get_target_request_nr.
*/
unsigned num_discard_requests;
/*
* The number of WRITE SAME requests that will be submitted to the target.
* The request number can be accessed with dm_bio_get_target_request_nr.
*/
unsigned num_write_same_requests;
/*
* The minimum number of extra bytes allocated in each bio for the
* target to use. dm_per_bio_data returns the data location.
*/
unsigned per_bio_data_size;
/* target specific data */
void *private;
......@@ -241,6 +250,36 @@ struct dm_target_callbacks {
int (*congested_fn) (struct dm_target_callbacks *, int);
};
/*
* For bio-based dm.
* One of these is allocated for each bio.
* This structure shouldn't be touched directly by target drivers.
* It is here so that we can inline dm_per_bio_data and
* dm_bio_from_per_bio_data
*/
struct dm_target_io {
struct dm_io *io;
struct dm_target *ti;
union map_info info;
unsigned target_request_nr;
struct bio clone;
};
static inline void *dm_per_bio_data(struct bio *bio, size_t data_size)
{
return (char *)bio - offsetof(struct dm_target_io, clone) - data_size;
}
static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
{
return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone));
}
static inline unsigned dm_bio_get_target_request_nr(const struct bio *bio)
{
return container_of(bio, struct dm_target_io, clone)->target_request_nr;
}
int dm_register_target(struct target_type *t);
void dm_unregister_target(struct target_type *t);
......
......@@ -268,8 +268,8 @@ enum {
#define DM_VERSION_MAJOR 4
#define DM_VERSION_MINOR 23
#define DM_VERSION_PATCHLEVEL 0
#define DM_VERSION_EXTRA "-ioctl (2012-07-25)"
#define DM_VERSION_PATCHLEVEL 1
#define DM_VERSION_EXTRA "-ioctl (2012-12-18)"
/* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment