Commit dac94e29 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-4.12/dm-fixes-2' of...

Merge tag 'for-4.12/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - a couple DM thin provisioning fixes

 - a few request-based DM and DM multipath fixes for issues that were
   made when merging Christoph's changes with Bart's changes for 4.12

 - a DM bufio unsigned overflow fix

 - a couple pure fixes for the DM cache target.

 - various very small tweaks to the DM cache target that enable
   considerable speed improvements in the face of continuous IO. Given
   that the cache target was significantly reworked for 4.12 I see no
   reason to sit on these advances until 4.13 considering the favorable
   results associated with such minimalist tweaks.

* tag 'for-4.12/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm cache: handle kmalloc failure allocating background_tracker struct
  dm bufio: make the parameter "retain_bytes" unsigned long
  dm mpath: multipath_clone_and_map must not return -EIO
  dm mpath: don't return -EIO from dm_report_EIO
  dm rq: add a missing break to map_request
  dm space map disk: fix some book keeping in the disk space map
  dm thin metadata: call precommit before saving the roots
  dm cache policy smq: don't do any writebacks unless IDLE
  dm cache: simplify the IDLE vs BUSY state calculation
  dm cache: track all IO to the cache rather than just the origin device's IO
  dm cache policy smq: stop preemptively demoting blocks
  dm cache policy smq: put newly promoted entries at the top of the multiqueue
  dm cache policy smq: be more aggressive about triggering a writeback
  dm cache policy smq: only demote entries in bottom half of the clean multiqueue
  dm cache: fix incorrect 'idle_time' reset in IO tracker
parents 243bfd2c 7e1b9521
......@@ -218,7 +218,7 @@ static DEFINE_SPINLOCK(param_spinlock);
* Buffers are freed after this timeout
*/
static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
static unsigned dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
static unsigned long dm_bufio_peak_allocated;
static unsigned long dm_bufio_allocated_kmem_cache;
......@@ -1558,10 +1558,10 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
return true;
}
static unsigned get_retain_buffers(struct dm_bufio_client *c)
static unsigned long get_retain_buffers(struct dm_bufio_client *c)
{
unsigned retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes);
return retain_bytes / c->block_size;
unsigned long retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes);
return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT);
}
static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
......@@ -1571,7 +1571,7 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
struct dm_buffer *b, *tmp;
unsigned long freed = 0;
unsigned long count = nr_to_scan;
unsigned retain_target = get_retain_buffers(c);
unsigned long retain_target = get_retain_buffers(c);
for (l = 0; l < LIST_SIZE; l++) {
list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
......@@ -1794,8 +1794,8 @@ static bool older_than(struct dm_buffer *b, unsigned long age_hz)
static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
{
struct dm_buffer *b, *tmp;
unsigned retain_target = get_retain_buffers(c);
unsigned count;
unsigned long retain_target = get_retain_buffers(c);
unsigned long count;
LIST_HEAD(write_list);
dm_bufio_lock(c);
......@@ -1955,7 +1955,7 @@ MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
module_param_named(retain_bytes, dm_bufio_retain_bytes, uint, S_IRUGO | S_IWUSR);
module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");
module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
......
......@@ -33,6 +33,11 @@ struct background_tracker *btracker_create(unsigned max_work)
{
struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL);
if (!b) {
DMERR("couldn't create background_tracker");
return NULL;
}
b->max_work = max_work;
atomic_set(&b->pending_promotes, 0);
atomic_set(&b->pending_writebacks, 0);
......
......@@ -1120,8 +1120,6 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
* Cache entries may not be populated. So we cannot rely on the
* size of the clean queue.
*/
unsigned nr_clean;
if (idle) {
/*
* We'd like to clean everything.
......@@ -1129,18 +1127,16 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
return q_size(&mq->dirty) == 0u;
}
nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
return (nr_clean + btracker_nr_writebacks_queued(mq->bg_work)) >=
percent_to_target(mq, CLEAN_TARGET);
/*
* If we're busy we don't worry about cleaning at all.
*/
return true;
}
static bool free_target_met(struct smq_policy *mq, bool idle)
static bool free_target_met(struct smq_policy *mq)
{
unsigned nr_free;
if (!idle)
return true;
nr_free = from_cblock(mq->cache_size) - mq->cache_alloc.nr_allocated;
return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >=
percent_to_target(mq, FREE_TARGET);
......@@ -1190,9 +1186,9 @@ static void queue_demotion(struct smq_policy *mq)
if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed)))
return;
e = q_peek(&mq->clean, mq->clean.nr_levels, true);
e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true);
if (!e) {
if (!clean_target_met(mq, false))
if (!clean_target_met(mq, true))
queue_writeback(mq);
return;
}
......@@ -1220,7 +1216,7 @@ static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
* We always claim to be 'idle' to ensure some demotions happen
* with continuous loads.
*/
if (!free_target_met(mq, true))
if (!free_target_met(mq))
queue_demotion(mq);
return;
}
......@@ -1421,15 +1417,11 @@ static int smq_get_background_work(struct dm_cache_policy *p, bool idle,
spin_lock_irqsave(&mq->lock, flags);
r = btracker_issue(mq->bg_work, result);
if (r == -ENODATA) {
/* find some writeback work to do */
if (mq->migrations_allowed && !free_target_met(mq, idle))
queue_demotion(mq);
else if (!clean_target_met(mq, idle))
if (!clean_target_met(mq, idle)) {
queue_writeback(mq);
r = btracker_issue(mq->bg_work, result);
}
}
spin_unlock_irqrestore(&mq->lock, flags);
return r;
......@@ -1452,6 +1444,7 @@ static void __complete_background_work(struct smq_policy *mq,
clear_pending(mq, e);
if (success) {
e->oblock = work->oblock;
e->level = NR_CACHE_LEVELS - 1;
push(mq, e);
// h, q, a
} else {
......
......@@ -94,6 +94,9 @@ static void iot_io_begin(struct io_tracker *iot, sector_t len)
static void __iot_io_end(struct io_tracker *iot, sector_t len)
{
if (!len)
return;
iot->in_flight -= len;
if (!iot->in_flight)
iot->idle_time = jiffies;
......@@ -474,7 +477,7 @@ struct cache {
spinlock_t invalidation_lock;
struct list_head invalidation_requests;
struct io_tracker origin_tracker;
struct io_tracker tracker;
struct work_struct commit_ws;
struct batcher committer;
......@@ -901,8 +904,7 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
static bool accountable_bio(struct cache *cache, struct bio *bio)
{
return ((bio->bi_bdev == cache->origin_dev->bdev) &&
bio_op(bio) != REQ_OP_DISCARD);
return bio_op(bio) != REQ_OP_DISCARD;
}
static void accounted_begin(struct cache *cache, struct bio *bio)
......@@ -912,7 +914,7 @@ static void accounted_begin(struct cache *cache, struct bio *bio)
if (accountable_bio(cache, bio)) {
pb->len = bio_sectors(bio);
iot_io_begin(&cache->origin_tracker, pb->len);
iot_io_begin(&cache->tracker, pb->len);
}
}
......@@ -921,7 +923,7 @@ static void accounted_complete(struct cache *cache, struct bio *bio)
size_t pb_data_size = get_per_bio_data_size(cache);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
iot_io_end(&cache->origin_tracker, pb->len);
iot_io_end(&cache->tracker, pb->len);
}
static void accounted_request(struct cache *cache, struct bio *bio)
......@@ -1716,20 +1718,19 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
enum busy {
IDLE,
MODERATE,
BUSY
};
static enum busy spare_migration_bandwidth(struct cache *cache)
{
bool idle = iot_idle_for(&cache->origin_tracker, HZ);
bool idle = iot_idle_for(&cache->tracker, HZ);
sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
cache->sectors_per_block;
if (current_volume <= cache->migration_threshold)
return idle ? IDLE : MODERATE;
if (idle && current_volume <= cache->migration_threshold)
return IDLE;
else
return idle ? MODERATE : BUSY;
return BUSY;
}
static void inc_hit_counter(struct cache *cache, struct bio *bio)
......@@ -2045,8 +2046,6 @@ static void check_migrations(struct work_struct *ws)
for (;;) {
b = spare_migration_bandwidth(cache);
if (b == BUSY)
break;
r = policy_get_background_work(cache->policy, b == IDLE, &op);
if (r == -ENODATA)
......@@ -2717,7 +2716,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
batcher_init(&cache->committer, commit_op, cache,
issue_op, cache, cache->wq);
iot_init(&cache->origin_tracker);
iot_init(&cache->tracker);
init_rwsem(&cache->background_work_lock);
prevent_background_work(cache);
......@@ -2941,7 +2940,7 @@ static void cache_postsuspend(struct dm_target *ti)
cancel_delayed_work(&cache->waker);
flush_workqueue(cache->wq);
WARN_ON(cache->origin_tracker.in_flight);
WARN_ON(cache->tracker.in_flight);
/*
* If it's a flush suspend there won't be any deferred bios, so this
......
......@@ -447,7 +447,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
* it has been invoked.
*/
#define dm_report_EIO(m) \
({ \
do { \
struct mapped_device *md = dm_table_get_md((m)->ti->table); \
\
pr_debug("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d\n", \
......@@ -455,8 +455,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \
dm_noflush_suspending((m)->ti)); \
-EIO; \
})
} while (0)
/*
* Map cloned requests (request-based multipath)
......@@ -481,7 +480,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
if (!pgpath) {
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
return DM_MAPIO_DELAY_REQUEUE;
return dm_report_EIO(m); /* Failed */
dm_report_EIO(m); /* Failed */
return DM_MAPIO_KILL;
} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
if (pg_init_all_paths(m))
......@@ -558,7 +558,8 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
if (!pgpath) {
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
return DM_MAPIO_REQUEUE;
return dm_report_EIO(m);
dm_report_EIO(m);
return -EIO;
}
mpio->pgpath = pgpath;
......@@ -1493,7 +1494,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
if (atomic_read(&m->nr_valid_paths) == 0 &&
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
if (error == -EIO)
error = dm_report_EIO(m);
dm_report_EIO(m);
/* complete with the original error */
r = DM_ENDIO_DONE;
}
......@@ -1524,8 +1525,10 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
fail_path(mpio->pgpath);
if (atomic_read(&m->nr_valid_paths) == 0 &&
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
return dm_report_EIO(m);
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
dm_report_EIO(m);
return -EIO;
}
/* Queue for the daemon to resubmit */
dm_bio_restore(get_bio_details_from_bio(clone), clone);
......
......@@ -507,6 +507,7 @@ static int map_request(struct dm_rq_target_io *tio)
case DM_MAPIO_KILL:
/* The target wants to complete the I/O */
dm_kill_unmapped_request(rq, -EIO);
break;
default:
DMWARN("unimplemented target map return value: %d", r);
BUG();
......
......@@ -484,11 +484,11 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd)
if (r < 0)
return r;
r = save_sm_roots(pmd);
r = dm_tm_pre_commit(pmd->tm);
if (r < 0)
return r;
r = dm_tm_pre_commit(pmd->tm);
r = save_sm_roots(pmd);
if (r < 0)
return r;
......
......@@ -142,10 +142,23 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
{
int r;
uint32_t old_count;
enum allocation_event ev;
struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
return sm_ll_dec(&smd->ll, b, &ev);
r = sm_ll_dec(&smd->ll, b, &ev);
if (!r && (ev == SM_FREE)) {
/*
* It's only free if it's also free in the last
* transaction.
*/
r = sm_ll_lookup(&smd->old_ll, b, &old_count);
if (!r && !old_count)
smd->nr_allocated_this_transaction--;
}
return r;
}
static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment