Commit b91593fa authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-4.15/dm' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - a few conversions from atomic_t to ref_count_t

 - a DM core fix for a race during device destruction that could result
   in a BUG_ON

 - a stable@ fix for a DM cache race condition that could lead to data
   corruption when operating in writeback mode (writethrough is default)

 - various DM cache cleanups and improvements

 - add DAX support to the DM log-writes target

 - a fix for the DM zoned target's ability to deal with the last zone of
   the drive being smaller than all others

 - a stable@ DM crypt and DM integrity fix for a negative check that was
   to restrictive (prevented slab debug with XFS ontop of DM crypt from
   working)

 - a DM raid target fix for a panic that can occur when forcing a raid
   to sync

* tag 'for-4.15/dm' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (25 commits)
  dm cache: lift common migration preparation code to alloc_migration()
  dm cache: remove usused deferred_cells member from struct cache
  dm cache policy smq: allocate cache blocks in order
  dm cache policy smq: change max background work from 10240 to 4096 blocks
  dm cache background tracker: limit amount of background work that may be issued at once
  dm cache policy smq: take origin idle status into account when queuing writebacks
  dm cache policy smq: handle races with queuing background_work
  dm raid: fix panic when attempting to force a raid to sync
  dm integrity: allow unaligned bv_offset
  dm crypt: allow unaligned bv_offset
  dm: small cleanup in dm_get_md()
  dm: fix race between dm_get_from_kobject() and __dm_destroy()
  dm: allocate struct mapped_device with kvzalloc
  dm zoned: ignore last smaller runt zone
  dm space map metadata: use ARRAY_SIZE
  dm log writes: add support for DAX
  dm log writes: add support for inline data buffers
  dm cache: simplify get_per_bio_data() by removing data_size argument
  dm cache: remove all obsolete writethrough-specific code
  dm cache: submit writethrough writes in parallel to origin and cache
  ...
parents e2c5923c ef7afb36
...@@ -161,8 +161,17 @@ EXPORT_SYMBOL_GPL(btracker_nr_demotions_queued); ...@@ -161,8 +161,17 @@ EXPORT_SYMBOL_GPL(btracker_nr_demotions_queued);
static bool max_work_reached(struct background_tracker *b) static bool max_work_reached(struct background_tracker *b)
{ {
// FIXME: finish return atomic_read(&b->pending_promotes) +
return false; atomic_read(&b->pending_writebacks) +
atomic_read(&b->pending_demotes) >= b->max_work;
}
struct bt_work *alloc_work(struct background_tracker *b)
{
if (max_work_reached(b))
return NULL;
return kmem_cache_alloc(b->work_cache, GFP_NOWAIT);
} }
int btracker_queue(struct background_tracker *b, int btracker_queue(struct background_tracker *b,
...@@ -174,10 +183,7 @@ int btracker_queue(struct background_tracker *b, ...@@ -174,10 +183,7 @@ int btracker_queue(struct background_tracker *b,
if (pwork) if (pwork)
*pwork = NULL; *pwork = NULL;
if (max_work_reached(b)) w = alloc_work(b);
return -ENOMEM;
w = kmem_cache_alloc(b->work_cache, GFP_NOWAIT);
if (!w) if (!w)
return -ENOMEM; return -ENOMEM;
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include "persistent-data/dm-transaction-manager.h" #include "persistent-data/dm-transaction-manager.h"
#include <linux/device-mapper.h> #include <linux/device-mapper.h>
#include <linux/refcount.h>
/*----------------------------------------------------------------*/ /*----------------------------------------------------------------*/
...@@ -100,7 +101,7 @@ struct cache_disk_superblock { ...@@ -100,7 +101,7 @@ struct cache_disk_superblock {
} __packed; } __packed;
struct dm_cache_metadata { struct dm_cache_metadata {
atomic_t ref_count; refcount_t ref_count;
struct list_head list; struct list_head list;
unsigned version; unsigned version;
...@@ -753,7 +754,7 @@ static struct dm_cache_metadata *metadata_open(struct block_device *bdev, ...@@ -753,7 +754,7 @@ static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
} }
cmd->version = metadata_version; cmd->version = metadata_version;
atomic_set(&cmd->ref_count, 1); refcount_set(&cmd->ref_count, 1);
init_rwsem(&cmd->root_lock); init_rwsem(&cmd->root_lock);
cmd->bdev = bdev; cmd->bdev = bdev;
cmd->data_block_size = data_block_size; cmd->data_block_size = data_block_size;
...@@ -791,7 +792,7 @@ static struct dm_cache_metadata *lookup(struct block_device *bdev) ...@@ -791,7 +792,7 @@ static struct dm_cache_metadata *lookup(struct block_device *bdev)
list_for_each_entry(cmd, &table, list) list_for_each_entry(cmd, &table, list)
if (cmd->bdev == bdev) { if (cmd->bdev == bdev) {
atomic_inc(&cmd->ref_count); refcount_inc(&cmd->ref_count);
return cmd; return cmd;
} }
...@@ -862,7 +863,7 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, ...@@ -862,7 +863,7 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
void dm_cache_metadata_close(struct dm_cache_metadata *cmd) void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
{ {
if (atomic_dec_and_test(&cmd->ref_count)) { if (refcount_dec_and_test(&cmd->ref_count)) {
mutex_lock(&table_lock); mutex_lock(&table_lock);
list_del(&cmd->list); list_del(&cmd->list);
mutex_unlock(&table_lock); mutex_unlock(&table_lock);
......
...@@ -213,6 +213,19 @@ static void l_del(struct entry_space *es, struct ilist *l, struct entry *e) ...@@ -213,6 +213,19 @@ static void l_del(struct entry_space *es, struct ilist *l, struct entry *e)
l->nr_elts--; l->nr_elts--;
} }
static struct entry *l_pop_head(struct entry_space *es, struct ilist *l)
{
struct entry *e;
for (e = l_head(es, l); e; e = l_next(es, e))
if (!e->sentinel) {
l_del(es, l, e);
return e;
}
return NULL;
}
static struct entry *l_pop_tail(struct entry_space *es, struct ilist *l) static struct entry *l_pop_tail(struct entry_space *es, struct ilist *l)
{ {
struct entry *e; struct entry *e;
...@@ -719,7 +732,7 @@ static struct entry *alloc_entry(struct entry_alloc *ea) ...@@ -719,7 +732,7 @@ static struct entry *alloc_entry(struct entry_alloc *ea)
if (l_empty(&ea->free)) if (l_empty(&ea->free))
return NULL; return NULL;
e = l_pop_tail(ea->es, &ea->free); e = l_pop_head(ea->es, &ea->free);
init_entry(e); init_entry(e);
ea->nr_allocated++; ea->nr_allocated++;
...@@ -1158,13 +1171,13 @@ static void clear_pending(struct smq_policy *mq, struct entry *e) ...@@ -1158,13 +1171,13 @@ static void clear_pending(struct smq_policy *mq, struct entry *e)
e->pending_work = false; e->pending_work = false;
} }
static void queue_writeback(struct smq_policy *mq) static void queue_writeback(struct smq_policy *mq, bool idle)
{ {
int r; int r;
struct policy_work work; struct policy_work work;
struct entry *e; struct entry *e;
e = q_peek(&mq->dirty, mq->dirty.nr_levels, !mq->migrations_allowed); e = q_peek(&mq->dirty, mq->dirty.nr_levels, idle);
if (e) { if (e) {
mark_pending(mq, e); mark_pending(mq, e);
q_del(&mq->dirty, e); q_del(&mq->dirty, e);
...@@ -1174,12 +1187,16 @@ static void queue_writeback(struct smq_policy *mq) ...@@ -1174,12 +1187,16 @@ static void queue_writeback(struct smq_policy *mq)
work.cblock = infer_cblock(mq, e); work.cblock = infer_cblock(mq, e);
r = btracker_queue(mq->bg_work, &work, NULL); r = btracker_queue(mq->bg_work, &work, NULL);
WARN_ON_ONCE(r); // FIXME: finish, I think we have to get rid of this race. if (r) {
clear_pending(mq, e);
q_push_front(&mq->dirty, e);
}
} }
} }
static void queue_demotion(struct smq_policy *mq) static void queue_demotion(struct smq_policy *mq)
{ {
int r;
struct policy_work work; struct policy_work work;
struct entry *e; struct entry *e;
...@@ -1189,7 +1206,7 @@ static void queue_demotion(struct smq_policy *mq) ...@@ -1189,7 +1206,7 @@ static void queue_demotion(struct smq_policy *mq)
e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true); e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true);
if (!e) { if (!e) {
if (!clean_target_met(mq, true)) if (!clean_target_met(mq, true))
queue_writeback(mq); queue_writeback(mq, false);
return; return;
} }
...@@ -1199,12 +1216,17 @@ static void queue_demotion(struct smq_policy *mq) ...@@ -1199,12 +1216,17 @@ static void queue_demotion(struct smq_policy *mq)
work.op = POLICY_DEMOTE; work.op = POLICY_DEMOTE;
work.oblock = e->oblock; work.oblock = e->oblock;
work.cblock = infer_cblock(mq, e); work.cblock = infer_cblock(mq, e);
btracker_queue(mq->bg_work, &work, NULL); r = btracker_queue(mq->bg_work, &work, NULL);
if (r) {
clear_pending(mq, e);
q_push_front(&mq->clean, e);
}
} }
static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock, static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
struct policy_work **workp) struct policy_work **workp)
{ {
int r;
struct entry *e; struct entry *e;
struct policy_work work; struct policy_work work;
...@@ -1234,7 +1256,9 @@ static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock, ...@@ -1234,7 +1256,9 @@ static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
work.op = POLICY_PROMOTE; work.op = POLICY_PROMOTE;
work.oblock = oblock; work.oblock = oblock;
work.cblock = infer_cblock(mq, e); work.cblock = infer_cblock(mq, e);
btracker_queue(mq->bg_work, &work, workp); r = btracker_queue(mq->bg_work, &work, workp);
if (r)
free_entry(&mq->cache_alloc, e);
} }
/*----------------------------------------------------------------*/ /*----------------------------------------------------------------*/
...@@ -1418,7 +1442,7 @@ static int smq_get_background_work(struct dm_cache_policy *p, bool idle, ...@@ -1418,7 +1442,7 @@ static int smq_get_background_work(struct dm_cache_policy *p, bool idle,
r = btracker_issue(mq->bg_work, result); r = btracker_issue(mq->bg_work, result);
if (r == -ENODATA) { if (r == -ENODATA) {
if (!clean_target_met(mq, idle)) { if (!clean_target_met(mq, idle)) {
queue_writeback(mq); queue_writeback(mq, idle);
r = btracker_issue(mq->bg_work, result); r = btracker_issue(mq->bg_work, result);
} }
} }
...@@ -1778,7 +1802,7 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size, ...@@ -1778,7 +1802,7 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
mq->next_hotspot_period = jiffies; mq->next_hotspot_period = jiffies;
mq->next_cache_period = jiffies; mq->next_cache_period = jiffies;
mq->bg_work = btracker_create(10240); /* FIXME: hard coded value */ mq->bg_work = btracker_create(4096); /* FIXME: hard coded value */
if (!mq->bg_work) if (!mq->bg_work)
goto bad_btracker; goto bad_btracker;
......
...@@ -408,9 +408,7 @@ struct cache { ...@@ -408,9 +408,7 @@ struct cache {
int sectors_per_block_shift; int sectors_per_block_shift;
spinlock_t lock; spinlock_t lock;
struct list_head deferred_cells;
struct bio_list deferred_bios; struct bio_list deferred_bios;
struct bio_list deferred_writethrough_bios;
sector_t migration_threshold; sector_t migration_threshold;
wait_queue_head_t migration_wait; wait_queue_head_t migration_wait;
atomic_t nr_allocated_migrations; atomic_t nr_allocated_migrations;
...@@ -446,10 +444,10 @@ struct cache { ...@@ -446,10 +444,10 @@ struct cache {
struct dm_kcopyd_client *copier; struct dm_kcopyd_client *copier;
struct workqueue_struct *wq; struct workqueue_struct *wq;
struct work_struct deferred_bio_worker; struct work_struct deferred_bio_worker;
struct work_struct deferred_writethrough_worker;
struct work_struct migration_worker; struct work_struct migration_worker;
struct delayed_work waker; struct delayed_work waker;
struct dm_bio_prison_v2 *prison; struct dm_bio_prison_v2 *prison;
struct bio_set *bs;
mempool_t *migration_pool; mempool_t *migration_pool;
...@@ -490,15 +488,6 @@ struct per_bio_data { ...@@ -490,15 +488,6 @@ struct per_bio_data {
struct dm_bio_prison_cell_v2 *cell; struct dm_bio_prison_cell_v2 *cell;
struct dm_hook_info hook_info; struct dm_hook_info hook_info;
sector_t len; sector_t len;
/*
* writethrough fields. These MUST remain at the end of this
* structure and the 'cache' member must be the first as it
* is used to determine the offset of the writethrough fields.
*/
struct cache *cache;
dm_cblock_t cblock;
struct dm_bio_details bio_details;
}; };
struct dm_cache_migration { struct dm_cache_migration {
...@@ -515,19 +504,19 @@ struct dm_cache_migration { ...@@ -515,19 +504,19 @@ struct dm_cache_migration {
/*----------------------------------------------------------------*/ /*----------------------------------------------------------------*/
static bool writethrough_mode(struct cache_features *f) static bool writethrough_mode(struct cache *cache)
{ {
return f->io_mode == CM_IO_WRITETHROUGH; return cache->features.io_mode == CM_IO_WRITETHROUGH;
} }
static bool writeback_mode(struct cache_features *f) static bool writeback_mode(struct cache *cache)
{ {
return f->io_mode == CM_IO_WRITEBACK; return cache->features.io_mode == CM_IO_WRITEBACK;
} }
static inline bool passthrough_mode(struct cache_features *f) static inline bool passthrough_mode(struct cache *cache)
{ {
return unlikely(f->io_mode == CM_IO_PASSTHROUGH); return unlikely(cache->features.io_mode == CM_IO_PASSTHROUGH);
} }
/*----------------------------------------------------------------*/ /*----------------------------------------------------------------*/
...@@ -537,14 +526,9 @@ static void wake_deferred_bio_worker(struct cache *cache) ...@@ -537,14 +526,9 @@ static void wake_deferred_bio_worker(struct cache *cache)
queue_work(cache->wq, &cache->deferred_bio_worker); queue_work(cache->wq, &cache->deferred_bio_worker);
} }
static void wake_deferred_writethrough_worker(struct cache *cache)
{
queue_work(cache->wq, &cache->deferred_writethrough_worker);
}
static void wake_migration_worker(struct cache *cache) static void wake_migration_worker(struct cache *cache)
{ {
if (passthrough_mode(&cache->features)) if (passthrough_mode(cache))
return; return;
queue_work(cache->wq, &cache->migration_worker); queue_work(cache->wq, &cache->migration_worker);
...@@ -567,10 +551,13 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache) ...@@ -567,10 +551,13 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache)
struct dm_cache_migration *mg; struct dm_cache_migration *mg;
mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
if (mg) { if (!mg)
mg->cache = cache; return NULL;
atomic_inc(&mg->cache->nr_allocated_migrations);
} memset(mg, 0, sizeof(*mg));
mg->cache = cache;
atomic_inc(&cache->nr_allocated_migrations);
return mg; return mg;
} }
...@@ -618,27 +605,16 @@ static unsigned lock_level(struct bio *bio) ...@@ -618,27 +605,16 @@ static unsigned lock_level(struct bio *bio)
* Per bio data * Per bio data
*--------------------------------------------------------------*/ *--------------------------------------------------------------*/
/* static struct per_bio_data *get_per_bio_data(struct bio *bio)
* If using writeback, leave out struct per_bio_data's writethrough fields.
*/
#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
static size_t get_per_bio_data_size(struct cache *cache)
{ {
return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
}
static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
{
struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
BUG_ON(!pb); BUG_ON(!pb);
return pb; return pb;
} }
static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) static struct per_bio_data *init_per_bio_data(struct bio *bio)
{ {
struct per_bio_data *pb = get_per_bio_data(bio, data_size); struct per_bio_data *pb = get_per_bio_data(bio);
pb->tick = false; pb->tick = false;
pb->req_nr = dm_bio_get_target_bio_nr(bio); pb->req_nr = dm_bio_get_target_bio_nr(bio);
...@@ -678,7 +654,6 @@ static void defer_bios(struct cache *cache, struct bio_list *bios) ...@@ -678,7 +654,6 @@ static void defer_bios(struct cache *cache, struct bio_list *bios)
static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio) static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio)
{ {
bool r; bool r;
size_t pb_size;
struct per_bio_data *pb; struct per_bio_data *pb;
struct dm_cell_key_v2 key; struct dm_cell_key_v2 key;
dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL); dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
...@@ -703,8 +678,7 @@ static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bi ...@@ -703,8 +678,7 @@ static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bi
if (cell != cell_prealloc) if (cell != cell_prealloc)
free_prison_cell(cache, cell_prealloc); free_prison_cell(cache, cell_prealloc);
pb_size = get_per_bio_data_size(cache); pb = get_per_bio_data(bio);
pb = get_per_bio_data(bio, pb_size);
pb->cell = cell; pb->cell = cell;
return r; return r;
...@@ -856,28 +830,35 @@ static void remap_to_cache(struct cache *cache, struct bio *bio, ...@@ -856,28 +830,35 @@ static void remap_to_cache(struct cache *cache, struct bio *bio,
static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
{ {
unsigned long flags; unsigned long flags;
size_t pb_data_size = get_per_bio_data_size(cache); struct per_bio_data *pb;
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
spin_lock_irqsave(&cache->lock, flags); spin_lock_irqsave(&cache->lock, flags);
if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) && if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
bio_op(bio) != REQ_OP_DISCARD) { bio_op(bio) != REQ_OP_DISCARD) {
pb = get_per_bio_data(bio);
pb->tick = true; pb->tick = true;
cache->need_tick_bio = false; cache->need_tick_bio = false;
} }
spin_unlock_irqrestore(&cache->lock, flags); spin_unlock_irqrestore(&cache->lock, flags);
} }
static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
dm_oblock_t oblock) dm_oblock_t oblock, bool bio_has_pbd)
{ {
// FIXME: this is called way too much. if (bio_has_pbd)
check_if_tick_bio_needed(cache, bio); check_if_tick_bio_needed(cache, bio);
remap_to_origin(cache, bio); remap_to_origin(cache, bio);
if (bio_data_dir(bio) == WRITE) if (bio_data_dir(bio) == WRITE)
clear_discard(cache, oblock_to_dblock(cache, oblock)); clear_discard(cache, oblock_to_dblock(cache, oblock));
} }
static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
dm_oblock_t oblock)
{
// FIXME: check_if_tick_bio_needed() is called way too much through this interface
__remap_to_origin_clear_discard(cache, bio, oblock, true);
}
static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
dm_oblock_t oblock, dm_cblock_t cblock) dm_oblock_t oblock, dm_cblock_t cblock)
{ {
...@@ -908,10 +889,10 @@ static bool accountable_bio(struct cache *cache, struct bio *bio) ...@@ -908,10 +889,10 @@ static bool accountable_bio(struct cache *cache, struct bio *bio)
static void accounted_begin(struct cache *cache, struct bio *bio) static void accounted_begin(struct cache *cache, struct bio *bio)
{ {
size_t pb_data_size = get_per_bio_data_size(cache); struct per_bio_data *pb;
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
if (accountable_bio(cache, bio)) { if (accountable_bio(cache, bio)) {
pb = get_per_bio_data(bio);
pb->len = bio_sectors(bio); pb->len = bio_sectors(bio);
iot_io_begin(&cache->tracker, pb->len); iot_io_begin(&cache->tracker, pb->len);
} }
...@@ -919,8 +900,7 @@ static void accounted_begin(struct cache *cache, struct bio *bio) ...@@ -919,8 +900,7 @@ static void accounted_begin(struct cache *cache, struct bio *bio)
static void accounted_complete(struct cache *cache, struct bio *bio) static void accounted_complete(struct cache *cache, struct bio *bio)
{ {
size_t pb_data_size = get_per_bio_data_size(cache); struct per_bio_data *pb = get_per_bio_data(bio);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
iot_io_end(&cache->tracker, pb->len); iot_io_end(&cache->tracker, pb->len);
} }
...@@ -937,57 +917,26 @@ static void issue_op(struct bio *bio, void *context) ...@@ -937,57 +917,26 @@ static void issue_op(struct bio *bio, void *context)
accounted_request(cache, bio); accounted_request(cache, bio);
} }
static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
{
unsigned long flags;
spin_lock_irqsave(&cache->lock, flags);
bio_list_add(&cache->deferred_writethrough_bios, bio);
spin_unlock_irqrestore(&cache->lock, flags);
wake_deferred_writethrough_worker(cache);
}
static void writethrough_endio(struct bio *bio)
{
struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
dm_unhook_bio(&pb->hook_info, bio);
if (bio->bi_status) {
bio_endio(bio);
return;
}
dm_bio_restore(&pb->bio_details, bio);
remap_to_cache(pb->cache, bio, pb->cblock);
/*
* We can't issue this bio directly, since we're in interrupt
* context. So it gets put on a bio list for processing by the
* worker thread.
*/
defer_writethrough_bio(pb->cache, bio);
}
/* /*
* FIXME: send in parallel, huge latency as is.
* When running in writethrough mode we need to send writes to clean blocks * When running in writethrough mode we need to send writes to clean blocks
* to both the cache and origin devices. In future we'd like to clone the * to both the cache and origin devices. Clone the bio and send them in parallel.
* bio and send them in parallel, but for now we're doing them in
* series as this is easier.
*/ */
static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
dm_oblock_t oblock, dm_cblock_t cblock) dm_oblock_t oblock, dm_cblock_t cblock)
{ {
struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, cache->bs);
BUG_ON(!origin_bio);
pb->cache = cache; bio_chain(origin_bio, bio);
pb->cblock = cblock; /*
dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL); * Passing false to __remap_to_origin_clear_discard() skips
dm_bio_record(&pb->bio_details, bio); * all code that might use per_bio_data (since clone doesn't have it)
*/
__remap_to_origin_clear_discard(cache, origin_bio, oblock, false);
submit_bio(origin_bio);
remap_to_origin_clear_discard(pb->cache, bio, oblock); remap_to_cache(cache, bio, cblock);
} }
/*---------------------------------------------------------------- /*----------------------------------------------------------------
...@@ -1201,6 +1150,18 @@ static void background_work_end(struct cache *cache) ...@@ -1201,6 +1150,18 @@ static void background_work_end(struct cache *cache)
/*----------------------------------------------------------------*/ /*----------------------------------------------------------------*/
static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
{
return (bio_data_dir(bio) == WRITE) &&
(bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
}
static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
{
return writeback_mode(cache) &&
(is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
}
static void quiesce(struct dm_cache_migration *mg, static void quiesce(struct dm_cache_migration *mg,
void (*continuation)(struct work_struct *)) void (*continuation)(struct work_struct *))
{ {
...@@ -1248,8 +1209,7 @@ static int copy(struct dm_cache_migration *mg, bool promote) ...@@ -1248,8 +1209,7 @@ static int copy(struct dm_cache_migration *mg, bool promote)
static void bio_drop_shared_lock(struct cache *cache, struct bio *bio) static void bio_drop_shared_lock(struct cache *cache, struct bio *bio)
{ {
size_t pb_data_size = get_per_bio_data_size(cache); struct per_bio_data *pb = get_per_bio_data(bio);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell)) if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell))
free_prison_cell(cache, pb->cell); free_prison_cell(cache, pb->cell);
...@@ -1260,23 +1220,21 @@ static void overwrite_endio(struct bio *bio) ...@@ -1260,23 +1220,21 @@ static void overwrite_endio(struct bio *bio)
{ {
struct dm_cache_migration *mg = bio->bi_private; struct dm_cache_migration *mg = bio->bi_private;
struct cache *cache = mg->cache; struct cache *cache = mg->cache;
size_t pb_data_size = get_per_bio_data_size(cache); struct per_bio_data *pb = get_per_bio_data(bio);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
dm_unhook_bio(&pb->hook_info, bio); dm_unhook_bio(&pb->hook_info, bio);
if (bio->bi_status) if (bio->bi_status)
mg->k.input = bio->bi_status; mg->k.input = bio->bi_status;
queue_continuation(mg->cache->wq, &mg->k); queue_continuation(cache->wq, &mg->k);
} }
static void overwrite(struct dm_cache_migration *mg, static void overwrite(struct dm_cache_migration *mg,
void (*continuation)(struct work_struct *)) void (*continuation)(struct work_struct *))
{ {
struct bio *bio = mg->overwrite_bio; struct bio *bio = mg->overwrite_bio;
size_t pb_data_size = get_per_bio_data_size(mg->cache); struct per_bio_data *pb = get_per_bio_data(bio);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
...@@ -1474,12 +1432,50 @@ static void mg_upgrade_lock(struct work_struct *ws) ...@@ -1474,12 +1432,50 @@ static void mg_upgrade_lock(struct work_struct *ws)
} }
} }
static void mg_full_copy(struct work_struct *ws)
{
struct dm_cache_migration *mg = ws_to_mg(ws);
struct cache *cache = mg->cache;
struct policy_work *op = mg->op;
bool is_policy_promote = (op->op == POLICY_PROMOTE);
if ((!is_policy_promote && !is_dirty(cache, op->cblock)) ||
is_discarded_oblock(cache, op->oblock)) {
mg_upgrade_lock(ws);
return;
}
init_continuation(&mg->k, mg_upgrade_lock);
if (copy(mg, is_policy_promote)) {
DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
mg->k.input = BLK_STS_IOERR;
mg_complete(mg, false);
}
}
static void mg_copy(struct work_struct *ws) static void mg_copy(struct work_struct *ws)
{ {
int r;
struct dm_cache_migration *mg = ws_to_mg(ws); struct dm_cache_migration *mg = ws_to_mg(ws);
if (mg->overwrite_bio) { if (mg->overwrite_bio) {
/*
* No exclusive lock was held when we last checked if the bio
* was optimisable. So we have to check again in case things
* have changed (eg, the block may no longer be discarded).
*/
if (!optimisable_bio(mg->cache, mg->overwrite_bio, mg->op->oblock)) {
/*
* Fallback to a real full copy after doing some tidying up.
*/
bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio);
BUG_ON(rb); /* An exclussive lock must _not_ be held for this block */
mg->overwrite_bio = NULL;
inc_io_migrations(mg->cache);
mg_full_copy(ws);
return;
}
/* /*
* It's safe to do this here, even though it's new data * It's safe to do this here, even though it's new data
* because all IO has been locked out of the block. * because all IO has been locked out of the block.
...@@ -1489,26 +1485,8 @@ static void mg_copy(struct work_struct *ws) ...@@ -1489,26 +1485,8 @@ static void mg_copy(struct work_struct *ws)
*/ */
overwrite(mg, mg_update_metadata_after_copy); overwrite(mg, mg_update_metadata_after_copy);
} else { } else
struct cache *cache = mg->cache; mg_full_copy(ws);
struct policy_work *op = mg->op;
bool is_policy_promote = (op->op == POLICY_PROMOTE);
if ((!is_policy_promote && !is_dirty(cache, op->cblock)) ||
is_discarded_oblock(cache, op->oblock)) {
mg_upgrade_lock(ws);
return;
}
init_continuation(&mg->k, mg_upgrade_lock);
r = copy(mg, is_policy_promote);
if (r) {
DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
mg->k.input = BLK_STS_IOERR;
mg_complete(mg, false);
}
}
} }
static int mg_lock_writes(struct dm_cache_migration *mg) static int mg_lock_writes(struct dm_cache_migration *mg)
...@@ -1567,9 +1545,6 @@ static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio ...@@ -1567,9 +1545,6 @@ static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio
return -ENOMEM; return -ENOMEM;
} }
memset(mg, 0, sizeof(*mg));
mg->cache = cache;
mg->op = op; mg->op = op;
mg->overwrite_bio = bio; mg->overwrite_bio = bio;
...@@ -1703,9 +1678,6 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock, ...@@ -1703,9 +1678,6 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
return -ENOMEM; return -ENOMEM;
} }
memset(mg, 0, sizeof(*mg));
mg->cache = cache;
mg->overwrite_bio = bio; mg->overwrite_bio = bio;
mg->invalidate_cblock = cblock; mg->invalidate_cblock = cblock;
mg->invalidate_oblock = oblock; mg->invalidate_oblock = oblock;
...@@ -1748,26 +1720,12 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio) ...@@ -1748,26 +1720,12 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
/*----------------------------------------------------------------*/ /*----------------------------------------------------------------*/
static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
{
return (bio_data_dir(bio) == WRITE) &&
(bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
}
static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
{
return writeback_mode(&cache->features) &&
(is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
}
static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block, static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
bool *commit_needed) bool *commit_needed)
{ {
int r, data_dir; int r, data_dir;
bool rb, background_queued; bool rb, background_queued;
dm_cblock_t cblock; dm_cblock_t cblock;
size_t pb_data_size = get_per_bio_data_size(cache);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
*commit_needed = false; *commit_needed = false;
...@@ -1816,6 +1774,8 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block, ...@@ -1816,6 +1774,8 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
} }
if (r == -ENOENT) { if (r == -ENOENT) {
struct per_bio_data *pb = get_per_bio_data(bio);
/* /*
* Miss. * Miss.
*/ */
...@@ -1823,7 +1783,6 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block, ...@@ -1823,7 +1783,6 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
if (pb->req_nr == 0) { if (pb->req_nr == 0) {
accounted_begin(cache, bio); accounted_begin(cache, bio);
remap_to_origin_clear_discard(cache, bio, block); remap_to_origin_clear_discard(cache, bio, block);
} else { } else {
/* /*
* This is a duplicate writethrough io that is no * This is a duplicate writethrough io that is no
...@@ -1842,18 +1801,17 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block, ...@@ -1842,18 +1801,17 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
* Passthrough always maps to the origin, invalidating any * Passthrough always maps to the origin, invalidating any
* cache blocks that are written to. * cache blocks that are written to.
*/ */
if (passthrough_mode(&cache->features)) { if (passthrough_mode(cache)) {
if (bio_data_dir(bio) == WRITE) { if (bio_data_dir(bio) == WRITE) {
bio_drop_shared_lock(cache, bio); bio_drop_shared_lock(cache, bio);
atomic_inc(&cache->stats.demotion); atomic_inc(&cache->stats.demotion);
invalidate_start(cache, cblock, block, bio); invalidate_start(cache, cblock, block, bio);
} else } else
remap_to_origin_clear_discard(cache, bio, block); remap_to_origin_clear_discard(cache, bio, block);
} else { } else {
if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && if (bio_data_dir(bio) == WRITE && writethrough_mode(cache) &&
!is_dirty(cache, cblock)) { !is_dirty(cache, cblock)) {
remap_to_origin_then_cache(cache, bio, block, cblock); remap_to_origin_and_cache(cache, bio, block, cblock);
accounted_begin(cache, bio); accounted_begin(cache, bio);
} else } else
remap_to_cache_dirty(cache, bio, block, cblock); remap_to_cache_dirty(cache, bio, block, cblock);
...@@ -1922,8 +1880,7 @@ static blk_status_t commit_op(void *context) ...@@ -1922,8 +1880,7 @@ static blk_status_t commit_op(void *context)
static bool process_flush_bio(struct cache *cache, struct bio *bio) static bool process_flush_bio(struct cache *cache, struct bio *bio)
{ {
size_t pb_data_size = get_per_bio_data_size(cache); struct per_bio_data *pb = get_per_bio_data(bio);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
if (!pb->req_nr) if (!pb->req_nr)
remap_to_origin(cache, bio); remap_to_origin(cache, bio);
...@@ -1983,28 +1940,6 @@ static void process_deferred_bios(struct work_struct *ws) ...@@ -1983,28 +1940,6 @@ static void process_deferred_bios(struct work_struct *ws)
schedule_commit(&cache->committer); schedule_commit(&cache->committer);
} }
static void process_deferred_writethrough_bios(struct work_struct *ws)
{
struct cache *cache = container_of(ws, struct cache, deferred_writethrough_worker);
unsigned long flags;
struct bio_list bios;
struct bio *bio;
bio_list_init(&bios);
spin_lock_irqsave(&cache->lock, flags);
bio_list_merge(&bios, &cache->deferred_writethrough_bios);
bio_list_init(&cache->deferred_writethrough_bios);
spin_unlock_irqrestore(&cache->lock, flags);
/*
* These bios have already been through accounted_begin()
*/
while ((bio = bio_list_pop(&bios)))
generic_make_request(bio);
}
/*---------------------------------------------------------------- /*----------------------------------------------------------------
* Main worker loop * Main worker loop
*--------------------------------------------------------------*/ *--------------------------------------------------------------*/
...@@ -2112,6 +2047,9 @@ static void destroy(struct cache *cache) ...@@ -2112,6 +2047,9 @@ static void destroy(struct cache *cache)
kfree(cache->ctr_args[i]); kfree(cache->ctr_args[i]);
kfree(cache->ctr_args); kfree(cache->ctr_args);
if (cache->bs)
bioset_free(cache->bs);
kfree(cache); kfree(cache);
} }
...@@ -2555,8 +2493,15 @@ static int cache_create(struct cache_args *ca, struct cache **result) ...@@ -2555,8 +2493,15 @@ static int cache_create(struct cache_args *ca, struct cache **result)
ti->discards_supported = true; ti->discards_supported = true;
ti->split_discard_bios = false; ti->split_discard_bios = false;
ti->per_io_data_size = sizeof(struct per_bio_data);
cache->features = ca->features; cache->features = ca->features;
ti->per_io_data_size = get_per_bio_data_size(cache); if (writethrough_mode(cache)) {
/* Create bioset for writethrough bios issued to origin */
cache->bs = bioset_create(BIO_POOL_SIZE, 0, 0);
if (!cache->bs)
goto bad;
}
cache->callbacks.congested_fn = cache_is_congested; cache->callbacks.congested_fn = cache_is_congested;
dm_table_add_target_callbacks(ti->table, &cache->callbacks); dm_table_add_target_callbacks(ti->table, &cache->callbacks);
...@@ -2618,7 +2563,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) ...@@ -2618,7 +2563,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
goto bad; goto bad;
} }
if (passthrough_mode(&cache->features)) { if (passthrough_mode(cache)) {
bool all_clean; bool all_clean;
r = dm_cache_metadata_all_clean(cache->cmd, &all_clean); r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
...@@ -2637,9 +2582,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) ...@@ -2637,9 +2582,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
} }
spin_lock_init(&cache->lock); spin_lock_init(&cache->lock);
INIT_LIST_HEAD(&cache->deferred_cells);
bio_list_init(&cache->deferred_bios); bio_list_init(&cache->deferred_bios);
bio_list_init(&cache->deferred_writethrough_bios);
atomic_set(&cache->nr_allocated_migrations, 0); atomic_set(&cache->nr_allocated_migrations, 0);
atomic_set(&cache->nr_io_migrations, 0); atomic_set(&cache->nr_io_migrations, 0);
init_waitqueue_head(&cache->migration_wait); init_waitqueue_head(&cache->migration_wait);
...@@ -2678,8 +2621,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) ...@@ -2678,8 +2621,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
goto bad; goto bad;
} }
INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios); INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios);
INIT_WORK(&cache->deferred_writethrough_worker,
process_deferred_writethrough_bios);
INIT_WORK(&cache->migration_worker, check_migrations); INIT_WORK(&cache->migration_worker, check_migrations);
INIT_DELAYED_WORK(&cache->waker, do_waker); INIT_DELAYED_WORK(&cache->waker, do_waker);
...@@ -2795,9 +2736,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio) ...@@ -2795,9 +2736,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
int r; int r;
bool commit_needed; bool commit_needed;
dm_oblock_t block = get_bio_block(cache, bio); dm_oblock_t block = get_bio_block(cache, bio);
size_t pb_data_size = get_per_bio_data_size(cache);
init_per_bio_data(bio, pb_data_size); init_per_bio_data(bio);
if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
/* /*
* This can only occur if the io goes to a partial block at * This can only occur if the io goes to a partial block at
...@@ -2821,13 +2761,11 @@ static int cache_map(struct dm_target *ti, struct bio *bio) ...@@ -2821,13 +2761,11 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
return r; return r;
} }
static int cache_end_io(struct dm_target *ti, struct bio *bio, static int cache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
blk_status_t *error)
{ {
struct cache *cache = ti->private; struct cache *cache = ti->private;
unsigned long flags; unsigned long flags;
size_t pb_data_size = get_per_bio_data_size(cache); struct per_bio_data *pb = get_per_bio_data(bio);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
if (pb->tick) { if (pb->tick) {
policy_tick(cache->policy, false); policy_tick(cache->policy, false);
...@@ -3243,13 +3181,13 @@ static void cache_status(struct dm_target *ti, status_type_t type, ...@@ -3243,13 +3181,13 @@ static void cache_status(struct dm_target *ti, status_type_t type,
else else
DMEMIT("1 "); DMEMIT("1 ");
if (writethrough_mode(&cache->features)) if (writethrough_mode(cache))
DMEMIT("writethrough "); DMEMIT("writethrough ");
else if (passthrough_mode(&cache->features)) else if (passthrough_mode(cache))
DMEMIT("passthrough "); DMEMIT("passthrough ");
else if (writeback_mode(&cache->features)) else if (writeback_mode(cache))
DMEMIT("writeback "); DMEMIT("writeback ");
else { else {
...@@ -3415,7 +3353,7 @@ static int process_invalidate_cblocks_message(struct cache *cache, unsigned coun ...@@ -3415,7 +3353,7 @@ static int process_invalidate_cblocks_message(struct cache *cache, unsigned coun
unsigned i; unsigned i;
struct cblock_range range; struct cblock_range range;
if (!passthrough_mode(&cache->features)) { if (!passthrough_mode(cache)) {
DMERR("%s: cache has to be in passthrough mode for invalidation", DMERR("%s: cache has to be in passthrough mode for invalidation",
cache_device_name(cache)); cache_device_name(cache));
return -EPERM; return -EPERM;
......
...@@ -29,7 +29,6 @@ struct dm_kobject_holder { ...@@ -29,7 +29,6 @@ struct dm_kobject_holder {
* DM targets must _not_ deference a mapped_device to directly access its members! * DM targets must _not_ deference a mapped_device to directly access its members!
*/ */
struct mapped_device { struct mapped_device {
struct srcu_struct io_barrier;
struct mutex suspend_lock; struct mutex suspend_lock;
/* /*
...@@ -127,6 +126,8 @@ struct mapped_device { ...@@ -127,6 +126,8 @@ struct mapped_device {
struct blk_mq_tag_set *tag_set; struct blk_mq_tag_set *tag_set;
bool use_blk_mq:1; bool use_blk_mq:1;
bool init_tio_pdu:1; bool init_tio_pdu:1;
struct srcu_struct io_barrier;
}; };
void dm_init_md_queue(struct mapped_device *md); void dm_init_md_queue(struct mapped_device *md);
......
...@@ -1075,7 +1075,7 @@ static int crypt_convert_block_aead(struct crypt_config *cc, ...@@ -1075,7 +1075,7 @@ static int crypt_convert_block_aead(struct crypt_config *cc,
BUG_ON(cc->integrity_iv_size && cc->integrity_iv_size != cc->iv_size); BUG_ON(cc->integrity_iv_size && cc->integrity_iv_size != cc->iv_size);
/* Reject unexpected unaligned bio. */ /* Reject unexpected unaligned bio. */
if (unlikely(bv_in.bv_offset & (cc->sector_size - 1))) if (unlikely(bv_in.bv_len & (cc->sector_size - 1)))
return -EIO; return -EIO;
dmreq = dmreq_of_req(cc, req); dmreq = dmreq_of_req(cc, req);
...@@ -1168,7 +1168,7 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc, ...@@ -1168,7 +1168,7 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
int r = 0; int r = 0;
/* Reject unexpected unaligned bio. */ /* Reject unexpected unaligned bio. */
if (unlikely(bv_in.bv_offset & (cc->sector_size - 1))) if (unlikely(bv_in.bv_len & (cc->sector_size - 1)))
return -EIO; return -EIO;
dmreq = dmreq_of_req(cc, req); dmreq = dmreq_of_req(cc, req);
......
...@@ -1377,7 +1377,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio) ...@@ -1377,7 +1377,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
struct bvec_iter iter; struct bvec_iter iter;
struct bio_vec bv; struct bio_vec bv;
bio_for_each_segment(bv, bio, iter) { bio_for_each_segment(bv, bio, iter) {
if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) { if (unlikely(bv.bv_len & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary", DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
bv.bv_offset, bv.bv_len, ic->sectors_per_block); bv.bv_offset, bv.bv_len, ic->sectors_per_block);
return DM_MAPIO_KILL; return DM_MAPIO_KILL;
......
...@@ -10,9 +10,11 @@ ...@@ -10,9 +10,11 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/dax.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/uio.h>
#define DM_MSG_PREFIX "log-writes" #define DM_MSG_PREFIX "log-writes"
...@@ -246,27 +248,108 @@ static int write_metadata(struct log_writes_c *lc, void *entry, ...@@ -246,27 +248,108 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
return -1; return -1;
} }
static int write_inline_data(struct log_writes_c *lc, void *entry,
size_t entrylen, void *data, size_t datalen,
sector_t sector)
{
int num_pages, bio_pages, pg_datalen, pg_sectorlen, i;
struct page *page;
struct bio *bio;
size_t ret;
void *ptr;
while (datalen) {
num_pages = ALIGN(datalen, PAGE_SIZE) >> PAGE_SHIFT;
bio_pages = min(num_pages, BIO_MAX_PAGES);
atomic_inc(&lc->io_blocks);
bio = bio_alloc(GFP_KERNEL, bio_pages);
if (!bio) {
DMERR("Couldn't alloc inline data bio");
goto error;
}
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, lc->logdev->bdev);
bio->bi_end_io = log_end_io;
bio->bi_private = lc;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
for (i = 0; i < bio_pages; i++) {
pg_datalen = min_t(int, datalen, PAGE_SIZE);
pg_sectorlen = ALIGN(pg_datalen, lc->sectorsize);
page = alloc_page(GFP_KERNEL);
if (!page) {
DMERR("Couldn't alloc inline data page");
goto error_bio;
}
ptr = kmap_atomic(page);
memcpy(ptr, data, pg_datalen);
if (pg_sectorlen > pg_datalen)
memset(ptr + pg_datalen, 0, pg_sectorlen - pg_datalen);
kunmap_atomic(ptr);
ret = bio_add_page(bio, page, pg_sectorlen, 0);
if (ret != pg_sectorlen) {
DMERR("Couldn't add page of inline data");
__free_page(page);
goto error_bio;
}
datalen -= pg_datalen;
data += pg_datalen;
}
submit_bio(bio);
sector += bio_pages * PAGE_SECTORS;
}
return 0;
error_bio:
bio_free_pages(bio);
bio_put(bio);
error:
put_io_block(lc);
return -1;
}
static int log_one_block(struct log_writes_c *lc, static int log_one_block(struct log_writes_c *lc,
struct pending_block *block, sector_t sector) struct pending_block *block, sector_t sector)
{ {
struct bio *bio; struct bio *bio;
struct log_write_entry entry; struct log_write_entry entry;
size_t ret; size_t metadatalen, ret;
int i; int i;
entry.sector = cpu_to_le64(block->sector); entry.sector = cpu_to_le64(block->sector);
entry.nr_sectors = cpu_to_le64(block->nr_sectors); entry.nr_sectors = cpu_to_le64(block->nr_sectors);
entry.flags = cpu_to_le64(block->flags); entry.flags = cpu_to_le64(block->flags);
entry.data_len = cpu_to_le64(block->datalen); entry.data_len = cpu_to_le64(block->datalen);
metadatalen = (block->flags & LOG_MARK_FLAG) ? block->datalen : 0;
if (write_metadata(lc, &entry, sizeof(entry), block->data, if (write_metadata(lc, &entry, sizeof(entry), block->data,
block->datalen, sector)) { metadatalen, sector)) {
free_pending_block(lc, block); free_pending_block(lc, block);
return -1; return -1;
} }
sector += dev_to_bio_sectors(lc, 1);
if (block->datalen && metadatalen == 0) {
if (write_inline_data(lc, &entry, sizeof(entry), block->data,
block->datalen, sector)) {
free_pending_block(lc, block);
return -1;
}
/* we don't support both inline data & bio data */
goto out;
}
if (!block->vec_cnt) if (!block->vec_cnt)
goto out; goto out;
sector += dev_to_bio_sectors(lc, 1);
atomic_inc(&lc->io_blocks); atomic_inc(&lc->io_blocks);
bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES)); bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES));
...@@ -527,6 +610,51 @@ static int log_mark(struct log_writes_c *lc, char *data) ...@@ -527,6 +610,51 @@ static int log_mark(struct log_writes_c *lc, char *data)
return 0; return 0;
} }
static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
struct iov_iter *i)
{
struct pending_block *block;
if (!bytes)
return 0;
block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
if (!block) {
DMERR("Error allocating dax pending block");
return -ENOMEM;
}
block->data = kzalloc(bytes, GFP_KERNEL);
if (!block->data) {
DMERR("Error allocating dax data space");
kfree(block);
return -ENOMEM;
}
/* write data provided via the iterator */
if (!copy_from_iter(block->data, bytes, i)) {
DMERR("Error copying dax data");
kfree(block->data);
kfree(block);
return -EIO;
}
/* rewind the iterator so that the block driver can use it */
iov_iter_revert(i, bytes);
block->datalen = bytes;
block->sector = bio_to_dev_sectors(lc, sector);
block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
atomic_inc(&lc->pending_blocks);
spin_lock_irq(&lc->blocks_lock);
list_add_tail(&block->list, &lc->unflushed_blocks);
spin_unlock_irq(&lc->blocks_lock);
wake_up_process(lc->log_kthread);
return 0;
}
static void log_writes_dtr(struct dm_target *ti) static void log_writes_dtr(struct dm_target *ti)
{ {
struct log_writes_c *lc = ti->private; struct log_writes_c *lc = ti->private;
...@@ -792,9 +920,46 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit ...@@ -792,9 +920,46 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
limits->io_min = limits->physical_block_size; limits->io_min = limits->physical_block_size;
} }
static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
{
struct log_writes_c *lc = ti->private;
sector_t sector = pgoff * PAGE_SECTORS;
int ret;
ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages * PAGE_SIZE, &pgoff);
if (ret)
return ret;
return dax_direct_access(lc->dev->dax_dev, pgoff, nr_pages, kaddr, pfn);
}
static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
pgoff_t pgoff, void *addr, size_t bytes,
struct iov_iter *i)
{
struct log_writes_c *lc = ti->private;
sector_t sector = pgoff * PAGE_SECTORS;
int err;
if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
/* Don't bother doing anything if logging has been disabled */
if (!lc->logging_enabled)
goto dax_copy;
err = log_dax(lc, sector, bytes, i);
if (err) {
DMWARN("Error %d logging DAX write", err);
return 0;
}
dax_copy:
return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
}
static struct target_type log_writes_target = { static struct target_type log_writes_target = {
.name = "log-writes", .name = "log-writes",
.version = {1, 0, 0}, .version = {1, 1, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = log_writes_ctr, .ctr = log_writes_ctr,
.dtr = log_writes_dtr, .dtr = log_writes_dtr,
...@@ -805,6 +970,8 @@ static struct target_type log_writes_target = { ...@@ -805,6 +970,8 @@ static struct target_type log_writes_target = {
.message = log_writes_message, .message = log_writes_message,
.iterate_devices = log_writes_iterate_devices, .iterate_devices = log_writes_iterate_devices,
.io_hints = log_writes_io_hints, .io_hints = log_writes_io_hints,
.direct_access = log_writes_dax_direct_access,
.dax_copy_from_iter = log_writes_dax_copy_from_iter,
}; };
static int __init dm_log_writes_init(void) static int __init dm_log_writes_init(void)
......
...@@ -2143,13 +2143,6 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev) ...@@ -2143,13 +2143,6 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
struct dm_raid_superblock *refsb; struct dm_raid_superblock *refsb;
uint64_t events_sb, events_refsb; uint64_t events_sb, events_refsb;
rdev->sb_start = 0;
rdev->sb_size = bdev_logical_block_size(rdev->meta_bdev);
if (rdev->sb_size < sizeof(*sb) || rdev->sb_size > PAGE_SIZE) {
DMERR("superblock size of a logical block is no longer valid");
return -EINVAL;
}
r = read_disk_sb(rdev, rdev->sb_size, false); r = read_disk_sb(rdev, rdev->sb_size, false);
if (r) if (r)
return r; return r;
...@@ -2494,6 +2487,17 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) ...@@ -2494,6 +2487,17 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
if (test_bit(Journal, &rdev->flags)) if (test_bit(Journal, &rdev->flags))
continue; continue;
if (!rdev->meta_bdev)
continue;
/* Set superblock offset/size for metadata device. */
rdev->sb_start = 0;
rdev->sb_size = bdev_logical_block_size(rdev->meta_bdev);
if (rdev->sb_size < sizeof(struct dm_raid_superblock) || rdev->sb_size > PAGE_SIZE) {
DMERR("superblock size of a logical block is no longer valid");
return -EINVAL;
}
/* /*
* Skipping super_load due to CTR_FLAG_SYNC will cause * Skipping super_load due to CTR_FLAG_SYNC will cause
* the array to undergo initialization again as * the array to undergo initialization again as
...@@ -2506,9 +2510,6 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) ...@@ -2506,9 +2510,6 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags))
continue; continue;
if (!rdev->meta_bdev)
continue;
r = super_load(rdev, freshest); r = super_load(rdev, freshest);
switch (r) { switch (r) {
......
...@@ -451,15 +451,15 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, ...@@ -451,15 +451,15 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
return r; return r;
} }
atomic_set(&dd->count, 0); refcount_set(&dd->count, 1);
list_add(&dd->list, &t->devices); list_add(&dd->list, &t->devices);
} else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) { } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) {
r = upgrade_mode(dd, mode, t->md); r = upgrade_mode(dd, mode, t->md);
if (r) if (r)
return r; return r;
refcount_inc(&dd->count);
} }
atomic_inc(&dd->count);
*result = dd->dm_dev; *result = dd->dm_dev;
return 0; return 0;
...@@ -515,7 +515,7 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d) ...@@ -515,7 +515,7 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d)
dm_device_name(ti->table->md), d->name); dm_device_name(ti->table->md), d->name);
return; return;
} }
if (atomic_dec_and_test(&dd->count)) { if (refcount_dec_and_test(&dd->count)) {
dm_put_table_device(ti->table->md, d); dm_put_table_device(ti->table->md, d);
list_del(&dd->list); list_del(&dd->list);
kfree(dd); kfree(dd);
......
...@@ -660,6 +660,7 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path) ...@@ -660,6 +660,7 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path)
struct dmz_target *dmz = ti->private; struct dmz_target *dmz = ti->private;
struct request_queue *q; struct request_queue *q;
struct dmz_dev *dev; struct dmz_dev *dev;
sector_t aligned_capacity;
int ret; int ret;
/* Get the target device */ /* Get the target device */
...@@ -685,15 +686,17 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path) ...@@ -685,15 +686,17 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path)
goto err; goto err;
} }
q = bdev_get_queue(dev->bdev);
dev->capacity = i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; dev->capacity = i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
if (ti->begin || (ti->len != dev->capacity)) { aligned_capacity = dev->capacity & ~(blk_queue_zone_sectors(q) - 1);
if (ti->begin ||
((ti->len != dev->capacity) && (ti->len != aligned_capacity))) {
ti->error = "Partial mapping not supported"; ti->error = "Partial mapping not supported";
ret = -EINVAL; ret = -EINVAL;
goto err; goto err;
} }
q = bdev_get_queue(dev->bdev); dev->zone_nr_sectors = blk_queue_zone_sectors(q);
dev->zone_nr_sectors = q->limits.chunk_sectors;
dev->zone_nr_sectors_shift = ilog2(dev->zone_nr_sectors); dev->zone_nr_sectors_shift = ilog2(dev->zone_nr_sectors);
dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors); dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors);
...@@ -929,8 +932,10 @@ static int dmz_iterate_devices(struct dm_target *ti, ...@@ -929,8 +932,10 @@ static int dmz_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data) iterate_devices_callout_fn fn, void *data)
{ {
struct dmz_target *dmz = ti->private; struct dmz_target *dmz = ti->private;
struct dmz_dev *dev = dmz->dev;
sector_t capacity = dev->capacity & ~(dev->zone_nr_sectors - 1);
return fn(ti, dmz->ddev, 0, dmz->dev->capacity, data); return fn(ti, dmz->ddev, 0, capacity, data);
} }
static struct target_type dmz_type = { static struct target_type dmz_type = {
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/pr.h> #include <linux/pr.h>
#include <linux/refcount.h>
#define DM_MSG_PREFIX "core" #define DM_MSG_PREFIX "core"
...@@ -98,7 +99,7 @@ struct dm_md_mempools { ...@@ -98,7 +99,7 @@ struct dm_md_mempools {
struct table_device { struct table_device {
struct list_head list; struct list_head list;
atomic_t count; refcount_t count;
struct dm_dev dm_dev; struct dm_dev dm_dev;
}; };
...@@ -685,10 +686,11 @@ int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, ...@@ -685,10 +686,11 @@ int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
format_dev_t(td->dm_dev.name, dev); format_dev_t(td->dm_dev.name, dev);
atomic_set(&td->count, 0); refcount_set(&td->count, 1);
list_add(&td->list, &md->table_devices); list_add(&td->list, &md->table_devices);
} else {
refcount_inc(&td->count);
} }
atomic_inc(&td->count);
mutex_unlock(&md->table_devices_lock); mutex_unlock(&md->table_devices_lock);
*result = &td->dm_dev; *result = &td->dm_dev;
...@@ -701,7 +703,7 @@ void dm_put_table_device(struct mapped_device *md, struct dm_dev *d) ...@@ -701,7 +703,7 @@ void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
struct table_device *td = container_of(d, struct table_device, dm_dev); struct table_device *td = container_of(d, struct table_device, dm_dev);
mutex_lock(&md->table_devices_lock); mutex_lock(&md->table_devices_lock);
if (atomic_dec_and_test(&td->count)) { if (refcount_dec_and_test(&td->count)) {
close_table_device(td, md); close_table_device(td, md);
list_del(&td->list); list_del(&td->list);
kfree(td); kfree(td);
...@@ -718,7 +720,7 @@ static void free_table_devices(struct list_head *devices) ...@@ -718,7 +720,7 @@ static void free_table_devices(struct list_head *devices)
struct table_device *td = list_entry(tmp, struct table_device, list); struct table_device *td = list_entry(tmp, struct table_device, list);
DMWARN("dm_destroy: %s still exists with %d references", DMWARN("dm_destroy: %s still exists with %d references",
td->dm_dev.name, atomic_read(&td->count)); td->dm_dev.name, refcount_read(&td->count));
kfree(td); kfree(td);
} }
} }
...@@ -1684,7 +1686,7 @@ static struct mapped_device *alloc_dev(int minor) ...@@ -1684,7 +1686,7 @@ static struct mapped_device *alloc_dev(int minor)
struct mapped_device *md; struct mapped_device *md;
void *old_md; void *old_md;
md = kzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id); md = kvzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id);
if (!md) { if (!md) {
DMWARN("unable to allocate device, out of memory."); DMWARN("unable to allocate device, out of memory.");
return NULL; return NULL;
...@@ -1784,7 +1786,7 @@ static struct mapped_device *alloc_dev(int minor) ...@@ -1784,7 +1786,7 @@ static struct mapped_device *alloc_dev(int minor)
bad_minor: bad_minor:
module_put(THIS_MODULE); module_put(THIS_MODULE);
bad_module_get: bad_module_get:
kfree(md); kvfree(md);
return NULL; return NULL;
} }
...@@ -1803,7 +1805,7 @@ static void free_dev(struct mapped_device *md) ...@@ -1803,7 +1805,7 @@ static void free_dev(struct mapped_device *md)
free_minor(minor); free_minor(minor);
module_put(THIS_MODULE); module_put(THIS_MODULE);
kfree(md); kvfree(md);
} }
static void __bind_mempools(struct mapped_device *md, struct dm_table *t) static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
...@@ -2061,17 +2063,12 @@ struct mapped_device *dm_get_md(dev_t dev) ...@@ -2061,17 +2063,12 @@ struct mapped_device *dm_get_md(dev_t dev)
spin_lock(&_minor_lock); spin_lock(&_minor_lock);
md = idr_find(&_minor_idr, minor); md = idr_find(&_minor_idr, minor);
if (md) { if (!md || md == MINOR_ALLOCED || (MINOR(disk_devt(dm_disk(md))) != minor) ||
if ((md == MINOR_ALLOCED || test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) {
(MINOR(disk_devt(dm_disk(md))) != minor) || md = NULL;
dm_deleting_md(md) || goto out;
test_bit(DMF_FREEING, &md->flags))) {
md = NULL;
goto out;
}
dm_get(md);
} }
dm_get(md);
out: out:
spin_unlock(&_minor_lock); spin_unlock(&_minor_lock);
...@@ -2698,11 +2695,15 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj) ...@@ -2698,11 +2695,15 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
md = container_of(kobj, struct mapped_device, kobj_holder.kobj); md = container_of(kobj, struct mapped_device, kobj_holder.kobj);
if (test_bit(DMF_FREEING, &md->flags) || spin_lock(&_minor_lock);
dm_deleting_md(md)) if (test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) {
return NULL; md = NULL;
goto out;
}
dm_get(md); dm_get(md);
out:
spin_unlock(&_minor_lock);
return md; return md;
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/hdreg.h> #include <linux/hdreg.h>
#include <linux/completion.h> #include <linux/completion.h>
#include <linux/kobject.h> #include <linux/kobject.h>
#include <linux/refcount.h>
#include "dm-stats.h" #include "dm-stats.h"
...@@ -38,7 +39,7 @@ ...@@ -38,7 +39,7 @@
*/ */
struct dm_dev_internal { struct dm_dev_internal {
struct list_head list; struct list_head list;
atomic_t count; refcount_t count;
struct dm_dev *dm_dev; struct dm_dev *dm_dev;
}; };
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/device-mapper.h> #include <linux/device-mapper.h>
#include <linux/kernel.h>
#define DM_MSG_PREFIX "space map metadata" #define DM_MSG_PREFIX "space map metadata"
...@@ -111,7 +112,7 @@ static bool brb_empty(struct bop_ring_buffer *brb) ...@@ -111,7 +112,7 @@ static bool brb_empty(struct bop_ring_buffer *brb)
static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old)
{ {
unsigned r = old + 1; unsigned r = old + 1;
return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r; return r >= ARRAY_SIZE(brb->bops) ? 0 : r;
} }
static int brb_push(struct bop_ring_buffer *brb, static int brb_push(struct bop_ring_buffer *brb,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment