Commit 22608405 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.3/dm-changes' of...

Merge tag 'for-5.3/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - Add encrypted byte-offset initialization vector (eboiv) to DM crypt.

 - Add optional discard features to DM snapshot which allow freeing
   space from a DM device whose free space was exhausted.

 - Various small improvements to use struct_size() and kzalloc().

 - Fix to check if DM thin metadata is in fail_io mode before attempting
   to update the superblock to set the needs_check flag. Otherwise the
   DM thin-pool can hang.

 - Fix DM bufio shrinker's potential for ABBA recursion deadlock with DM
   thin provisioning on loop usecase.

* tag 'for-5.3/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm bufio: fix deadlock with loop device
  dm snapshot: add optional discard support features
  dm crypt: implement eboiv - encrypted byte-offset initialization vector
  dm crypt: remove obsolete comment about plumb IV
  dm crypt: wipe private IV struct after key invalid flag is set
  dm integrity: use kzalloc() instead of kmalloc() + memset()
  dm: update stale comment in end_clone_bio()
  dm log writes: fix incorrect comment about the logged sequence example
  dm log writes: use struct_size() to calculate size of pending_block
  dm crypt: use struct_size() when allocating encryption context
  dm integrity: always set version on superblock update
  dm thin metadata: check if in fail_io mode when setting needs_check
parents 92adeb61 bd293d07
......@@ -32,6 +32,7 @@ its visible content unchanged, at least until the <COW device> fills up.
- snapshot <origin> <COW device> <persistent?> <chunksize>
[<# feature args> [<arg>]*]
A snapshot of the <origin> block device is created. Changed chunks of
<chunksize> sectors will be stored on the <COW device>. Writes will
......@@ -54,8 +55,23 @@ When loading or unloading the snapshot target, the corresponding
snapshot-origin or snapshot-merge target must be suspended. A failure to
suspend the origin target could result in data corruption.
Optional features:
* snapshot-merge <origin> <COW device> <persistent> <chunksize>
discard_zeroes_cow - a discard issued to the snapshot device that
maps to entire chunks to will zero the corresponding exception(s) in
the snapshot's exception store.
discard_passdown_origin - a discard to the snapshot device is passed
down to the snapshot-origin's underlying device. This doesn't cause
copy-out to the snapshot exception store because the snapshot-origin
target is bypassed.
The discard_passdown_origin feature depends on the discard_zeroes_cow
feature being enabled.
- snapshot-merge <origin> <COW device> <persistent> <chunksize>
[<# feature args> [<arg>]*]
takes the same table arguments as the snapshot target except it only
works with persistent snapshots. This target assumes the role of the
......
......@@ -1599,9 +1599,7 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
unsigned long freed;
c = container_of(shrink, struct dm_bufio_client, shrinker);
if (sc->gfp_mask & __GFP_FS)
dm_bufio_lock(c);
else if (!dm_bufio_trylock(c))
if (!dm_bufio_trylock(c))
return SHRINK_STOP;
freed = __scan(c, sc->nr_to_scan, sc->gfp_mask);
......
......@@ -120,6 +120,10 @@ struct iv_tcw_private {
u8 *whitening;
};
struct iv_eboiv_private {
struct crypto_cipher *tfm;
};
/*
* Crypt: maps a linear range of a block device
* and encrypts / decrypts at the same time.
......@@ -159,6 +163,7 @@ struct crypt_config {
struct iv_benbi_private benbi;
struct iv_lmk_private lmk;
struct iv_tcw_private tcw;
struct iv_eboiv_private eboiv;
} iv_gen_private;
u64 iv_offset;
unsigned int iv_size;
......@@ -291,8 +296,9 @@ static struct crypto_aead *any_tfm_aead(struct crypt_config *cc)
* Note that this encryption scheme is vulnerable to watermarking attacks
* and should be used for old compatible containers access only.
*
* plumb: unimplemented, see:
* http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454
* eboiv: Encrypted byte-offset IV (used in Bitlocker in CBC mode)
* The IV is encrypted little-endian byte-offset (with the same key
* and cipher as the volume).
*/
static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv,
......@@ -841,6 +847,67 @@ static int crypt_iv_random_gen(struct crypt_config *cc, u8 *iv,
return 0;
}
static void crypt_iv_eboiv_dtr(struct crypt_config *cc)
{
struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv;
crypto_free_cipher(eboiv->tfm);
eboiv->tfm = NULL;
}
static int crypt_iv_eboiv_ctr(struct crypt_config *cc, struct dm_target *ti,
const char *opts)
{
struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv;
struct crypto_cipher *tfm;
tfm = crypto_alloc_cipher(cc->cipher, 0, 0);
if (IS_ERR(tfm)) {
ti->error = "Error allocating crypto tfm for EBOIV";
return PTR_ERR(tfm);
}
if (crypto_cipher_blocksize(tfm) != cc->iv_size) {
ti->error = "Block size of EBOIV cipher does "
"not match IV size of block cipher";
crypto_free_cipher(tfm);
return -EINVAL;
}
eboiv->tfm = tfm;
return 0;
}
static int crypt_iv_eboiv_init(struct crypt_config *cc)
{
struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv;
int err;
err = crypto_cipher_setkey(eboiv->tfm, cc->key, cc->key_size);
if (err)
return err;
return 0;
}
static int crypt_iv_eboiv_wipe(struct crypt_config *cc)
{
/* Called after cc->key is set to random key in crypt_wipe() */
return crypt_iv_eboiv_init(cc);
}
static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv,
struct dm_crypt_request *dmreq)
{
struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv;
memset(iv, 0, cc->iv_size);
*(__le64 *)iv = cpu_to_le64(dmreq->iv_sector * cc->sector_size);
crypto_cipher_encrypt_one(eboiv->tfm, iv, iv);
return 0;
}
static const struct crypt_iv_operations crypt_iv_plain_ops = {
.generator = crypt_iv_plain_gen
};
......@@ -893,6 +960,14 @@ static struct crypt_iv_operations crypt_iv_random_ops = {
.generator = crypt_iv_random_gen
};
static struct crypt_iv_operations crypt_iv_eboiv_ops = {
.ctr = crypt_iv_eboiv_ctr,
.dtr = crypt_iv_eboiv_dtr,
.init = crypt_iv_eboiv_init,
.wipe = crypt_iv_eboiv_wipe,
.generator = crypt_iv_eboiv_gen
};
/*
* Integrity extensions
*/
......@@ -2158,6 +2233,14 @@ static int crypt_wipe_key(struct crypt_config *cc)
clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
get_random_bytes(&cc->key, cc->key_size);
/* Wipe IV private keys */
if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) {
r = cc->iv_gen_ops->wipe(cc);
if (r)
return r;
}
kzfree(cc->key_string);
cc->key_string = NULL;
r = crypt_setkey(cc);
......@@ -2288,6 +2371,8 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
cc->iv_gen_ops = &crypt_iv_benbi_ops;
else if (strcmp(ivmode, "null") == 0)
cc->iv_gen_ops = &crypt_iv_null_ops;
else if (strcmp(ivmode, "eboiv") == 0)
cc->iv_gen_ops = &crypt_iv_eboiv_ops;
else if (strcmp(ivmode, "lmk") == 0) {
cc->iv_gen_ops = &crypt_iv_lmk_ops;
/*
......@@ -2699,7 +2784,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
return -EINVAL;
}
cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
cc = kzalloc(struct_size(cc, key, key_size), GFP_KERNEL);
if (!cc) {
ti->error = "Cannot allocate encryption context";
return -ENOMEM;
......@@ -3050,14 +3135,8 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv,
memset(cc->key, 0, cc->key_size * sizeof(u8));
return ret;
}
if (argc == 2 && !strcasecmp(argv[1], "wipe")) {
if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) {
ret = cc->iv_gen_ops->wipe(cc);
if (ret)
return ret;
}
if (argc == 2 && !strcasecmp(argv[1], "wipe"))
return crypt_wipe_key(cc);
}
}
error:
......@@ -3094,7 +3173,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type crypt_target = {
.name = "crypt",
.version = {1, 18, 1},
.version = {1, 19, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
......
......@@ -476,6 +476,9 @@ static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags)
io_loc.sector = ic->start;
io_loc.count = SB_SECTORS;
if (op == REQ_OP_WRITE)
sb_set_version(ic);
return dm_io(&io_req, 1, &io_loc, NULL);
}
......@@ -2317,7 +2320,6 @@ static void recalc_write_super(struct dm_integrity_c *ic)
if (dm_integrity_failed(ic))
return;
sb_set_version(ic);
r = sync_rw_sb(ic, REQ_OP_WRITE, 0);
if (unlikely(r))
dm_integrity_io_error(ic, "writing superblock", r);
......@@ -3358,7 +3360,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
goto bad;
}
crypt_iv = kmalloc(ivsize, GFP_KERNEL);
crypt_iv = kzalloc(ivsize, GFP_KERNEL);
if (!crypt_iv) {
*error = "Could not allocate iv";
r = -ENOMEM;
......@@ -3387,7 +3389,6 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
sg_set_buf(&sg[i], va, PAGE_SIZE);
}
sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids);
memset(crypt_iv, 0x00, ivsize);
skcipher_request_set_crypt(req, sg, sg,
PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv);
......
......@@ -40,7 +40,7 @@
*
* Would result in the log looking like this:
*
* c,a,flush,fuad,b,<other writes>,<next flush>
* c,a,b,flush,fuad,<other writes>,<next flush>
*
* This is meant to help expose problems where file systems do not properly wait
* on data being written before invoking a FLUSH. FUA bypasses cache so once it
......@@ -699,7 +699,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
if (discard_bio)
alloc_size = sizeof(struct pending_block);
else
alloc_size = sizeof(struct pending_block) + sizeof(struct bio_vec) * bio_segments(bio);
alloc_size = struct_size(block, vecs, bio_segments(bio));
block = kzalloc(alloc_size, GFP_NOIO);
if (!block) {
......
......@@ -115,7 +115,7 @@ static void end_clone_bio(struct bio *clone)
/*
* Update the original request.
* Do not use blk_end_request() here, because it may complete
* Do not use blk_mq_end_request() here, because it may complete
* the original request before the clone, and break the ordering.
*/
if (is_last)
......
/*
* dm-snapshot.c
*
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
*
* This file is released under the GPL.
......@@ -134,7 +132,10 @@ struct dm_snapshot {
* - I/O error while merging
* => stop merging; set merge_failed; process I/O normally.
*/
int merge_failed;
bool merge_failed:1;
bool discard_zeroes_cow:1;
bool discard_passdown_origin:1;
/*
* Incoming bios that overlap with chunks being merged must wait
......@@ -1173,12 +1174,64 @@ static void stop_merge(struct dm_snapshot *s)
clear_bit(SHUTDOWN_MERGE, &s->state_bits);
}
static int parse_snapshot_features(struct dm_arg_set *as, struct dm_snapshot *s,
struct dm_target *ti)
{
int r;
unsigned argc;
const char *arg_name;
static const struct dm_arg _args[] = {
{0, 2, "Invalid number of feature arguments"},
};
/*
* No feature arguments supplied.
*/
if (!as->argc)
return 0;
r = dm_read_arg_group(_args, as, &argc, &ti->error);
if (r)
return -EINVAL;
while (argc && !r) {
arg_name = dm_shift_arg(as);
argc--;
if (!strcasecmp(arg_name, "discard_zeroes_cow"))
s->discard_zeroes_cow = true;
else if (!strcasecmp(arg_name, "discard_passdown_origin"))
s->discard_passdown_origin = true;
else {
ti->error = "Unrecognised feature requested";
r = -EINVAL;
break;
}
}
if (!s->discard_zeroes_cow && s->discard_passdown_origin) {
/*
* TODO: really these are disjoint.. but ti->num_discard_bios
* and dm_bio_get_target_bio_nr() require rigid constraints.
*/
ti->error = "discard_passdown_origin feature depends on discard_zeroes_cow";
r = -EINVAL;
}
return r;
}
/*
* Construct a snapshot mapping: <origin_dev> <COW-dev> <p|po|n> <chunk-size>
* Construct a snapshot mapping:
* <origin_dev> <COW-dev> <p|po|n> <chunk-size> [<# feature args> [<arg>]*]
*/
static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct dm_snapshot *s;
struct dm_arg_set as;
int i;
int r = -EINVAL;
char *origin_path, *cow_path;
......@@ -1186,8 +1239,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
unsigned args_used, num_flush_bios = 1;
fmode_t origin_mode = FMODE_READ;
if (argc != 4) {
ti->error = "requires exactly 4 arguments";
if (argc < 4) {
ti->error = "requires 4 or more arguments";
r = -EINVAL;
goto bad;
}
......@@ -1204,6 +1257,13 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
as.argc = argc;
as.argv = argv;
dm_consume_args(&as, 4);
r = parse_snapshot_features(&as, s, ti);
if (r)
goto bad_features;
origin_path = argv[0];
argv++;
argc--;
......@@ -1289,6 +1349,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->private = s;
ti->num_flush_bios = num_flush_bios;
if (s->discard_zeroes_cow)
ti->num_discard_bios = (s->discard_passdown_origin ? 2 : 1);
ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk);
/* Add snapshot to the list of snapshots for this origin */
......@@ -1336,29 +1398,22 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
bad_read_metadata:
unregister_snapshot(s);
bad_load_and_register:
mempool_exit(&s->pending_pool);
bad_pending_pool:
dm_kcopyd_client_destroy(s->kcopyd_client);
bad_kcopyd:
dm_exception_table_exit(&s->pending, pending_cache);
dm_exception_table_exit(&s->complete, exception_cache);
bad_hash_tables:
dm_exception_store_destroy(s->store);
bad_store:
dm_put_device(ti, s->cow);
bad_cow:
dm_put_device(ti, s->origin);
bad_origin:
bad_features:
kfree(s);
bad:
return r;
}
......@@ -1806,6 +1861,37 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
(bio->bi_iter.bi_sector & s->store->chunk_mask);
}
static void zero_callback(int read_err, unsigned long write_err, void *context)
{
struct bio *bio = context;
struct dm_snapshot *s = bio->bi_private;
up(&s->cow_count);
bio->bi_status = write_err ? BLK_STS_IOERR : 0;
bio_endio(bio);
}
static void zero_exception(struct dm_snapshot *s, struct dm_exception *e,
struct bio *bio, chunk_t chunk)
{
struct dm_io_region dest;
dest.bdev = s->cow->bdev;
dest.sector = bio->bi_iter.bi_sector;
dest.count = s->store->chunk_size;
down(&s->cow_count);
WARN_ON_ONCE(bio->bi_private);
bio->bi_private = s;
dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio);
}
static bool io_overlaps_chunk(struct dm_snapshot *s, struct bio *bio)
{
return bio->bi_iter.bi_size ==
(s->store->chunk_size << SECTOR_SHIFT);
}
static int snapshot_map(struct dm_target *ti, struct bio *bio)
{
struct dm_exception *e;
......@@ -1839,10 +1925,43 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
goto out_unlock;
}
if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
if (s->discard_passdown_origin && dm_bio_get_target_bio_nr(bio)) {
/*
* passdown discard to origin (without triggering
* snapshot exceptions via do_origin; doing so would
* defeat the goal of freeing space in origin that is
* implied by the "discard_passdown_origin" feature)
*/
bio_set_dev(bio, s->origin->bdev);
track_chunk(s, bio, chunk);
goto out_unlock;
}
/* discard to snapshot (target_bio_nr == 0) zeroes exceptions */
}
/* If the block is already remapped - use that, else remap it */
e = dm_lookup_exception(&s->complete, chunk);
if (e) {
remap_exception(s, e, bio, chunk);
if (unlikely(bio_op(bio) == REQ_OP_DISCARD) &&
io_overlaps_chunk(s, bio)) {
dm_exception_table_unlock(&lock);
up_read(&s->lock);
zero_exception(s, e, bio, chunk);
r = DM_MAPIO_SUBMITTED; /* discard is not issued */
goto out;
}
goto out_unlock;
}
if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
/*
* If no exception exists, complete discard immediately
* otherwise it'll trigger copy-out.
*/
bio_endio(bio);
r = DM_MAPIO_SUBMITTED;
goto out_unlock;
}
......@@ -1890,9 +2009,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
r = DM_MAPIO_SUBMITTED;
if (!pe->started &&
bio->bi_iter.bi_size ==
(s->store->chunk_size << SECTOR_SHIFT)) {
if (!pe->started && io_overlaps_chunk(s, bio)) {
pe->started = 1;
dm_exception_table_unlock(&lock);
......@@ -2138,6 +2255,7 @@ static void snapshot_status(struct dm_target *ti, status_type_t type,
{
unsigned sz = 0;
struct dm_snapshot *snap = ti->private;
unsigned num_features;
switch (type) {
case STATUSTYPE_INFO:
......@@ -2178,8 +2296,16 @@ static void snapshot_status(struct dm_target *ti, status_type_t type,
* make sense.
*/
DMEMIT("%s %s", snap->origin->name, snap->cow->name);
snap->store->type->status(snap->store, type, result + sz,
maxlen - sz);
sz += snap->store->type->status(snap->store, type, result + sz,
maxlen - sz);
num_features = snap->discard_zeroes_cow + snap->discard_passdown_origin;
if (num_features) {
DMEMIT(" %u", num_features);
if (snap->discard_zeroes_cow)
DMEMIT(" discard_zeroes_cow");
if (snap->discard_passdown_origin)
DMEMIT(" discard_passdown_origin");
}
break;
}
}
......@@ -2198,6 +2324,22 @@ static int snapshot_iterate_devices(struct dm_target *ti,
return r;
}
static void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct dm_snapshot *snap = ti->private;
if (snap->discard_zeroes_cow) {
struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
(void) __find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, NULL);
if (snap_src && snap_dest)
snap = snap_src;
/* All discards are split on chunk_size boundary */
limits->discard_granularity = snap->store->chunk_size;
limits->max_discard_sectors = snap->store->chunk_size;
}
}
/*-----------------------------------------------------------------
* Origin methods
......@@ -2522,7 +2664,7 @@ static struct target_type origin_target = {
static struct target_type snapshot_target = {
.name = "snapshot",
.version = {1, 15, 0},
.version = {1, 16, 0},
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
......@@ -2532,11 +2674,12 @@ static struct target_type snapshot_target = {
.resume = snapshot_resume,
.status = snapshot_status,
.iterate_devices = snapshot_iterate_devices,
.io_hints = snapshot_io_hints,
};
static struct target_type merge_target = {
.name = dm_snapshot_merge_target_name,
.version = {1, 4, 0},
.version = {1, 5, 0},
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
......@@ -2547,6 +2690,7 @@ static struct target_type merge_target = {
.resume = snapshot_merge_resume,
.status = snapshot_status,
.iterate_devices = snapshot_iterate_devices,
.io_hints = snapshot_io_hints,
};
static int __init dm_snapshot_init(void)
......
......@@ -2046,16 +2046,19 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd)
{
int r;
int r = -EINVAL;
struct dm_block *sblock;
struct thin_disk_superblock *disk_super;
pmd_write_lock(pmd);
if (pmd->fail_io)
goto out;
pmd->flags |= THIN_METADATA_NEEDS_CHECK_FLAG;
r = superblock_lock(pmd, &sblock);
if (r) {
DMERR("couldn't read superblock");
DMERR("couldn't lock superblock");
goto out;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment