Commit d1260e2a authored by Joe Thornber's avatar Joe Thornber Committed by Mike Snitzer

dm cache: fix race condition in the writeback mode overwrite_bio optimisation

When a DM cache in writeback mode moves data between the slow and fast
device it can often avoid a copy if the triggering bio either:

i) covers the whole block (no point copying if we're about to overwrite it)
ii) the migration is a promotion and the origin block is currently discarded

Prior to this fix there was a race with case (ii).  The discard status
was checked with a shared lock held (rather than exclusive).  This meant
another bio could run in parallel and write data to the origin, removing
the discard state.  After the promotion the parallel write would have
been lost.

With this fix the discard status is re-checked once the exclusive lock
has been aquired.  If the block is no longer discarded it falls back to
the slower full copy path.

Fixes: b29d4986 ("dm cache: significant rework to leverage dm-bio-prison-v2")
Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: default avatarJoe Thornber <ejt@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent 6bdd0796
......@@ -1201,6 +1201,18 @@ static void background_work_end(struct cache *cache)
/*----------------------------------------------------------------*/
static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
{
return (bio_data_dir(bio) == WRITE) &&
(bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
}
static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
{
return writeback_mode(&cache->features) &&
(is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
}
static void quiesce(struct dm_cache_migration *mg,
void (*continuation)(struct work_struct *))
{
......@@ -1474,22 +1486,9 @@ static void mg_upgrade_lock(struct work_struct *ws)
}
}
static void mg_copy(struct work_struct *ws)
static void mg_full_copy(struct work_struct *ws)
{
int r;
struct dm_cache_migration *mg = ws_to_mg(ws);
if (mg->overwrite_bio) {
/*
* It's safe to do this here, even though it's new data
* because all IO has been locked out of the block.
*
* mg_lock_writes() already took READ_WRITE_LOCK_LEVEL
* so _not_ using mg_upgrade_lock() as continutation.
*/
overwrite(mg, mg_update_metadata_after_copy);
} else {
struct cache *cache = mg->cache;
struct policy_work *op = mg->op;
bool is_policy_promote = (op->op == POLICY_PROMOTE);
......@@ -1502,13 +1501,46 @@ static void mg_copy(struct work_struct *ws)
init_continuation(&mg->k, mg_upgrade_lock);
r = copy(mg, is_policy_promote);
if (r) {
if (copy(mg, is_policy_promote)) {
DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
mg->k.input = BLK_STS_IOERR;
mg_complete(mg, false);
}
}
static void mg_copy(struct work_struct *ws)
{
struct dm_cache_migration *mg = ws_to_mg(ws);
if (mg->overwrite_bio) {
/*
* No exclusive lock was held when we last checked if the bio
* was optimisable. So we have to check again in case things
* have changed (eg, the block may no longer be discarded).
*/
if (!optimisable_bio(mg->cache, mg->overwrite_bio, mg->op->oblock)) {
/*
* Fallback to a real full copy after doing some tidying up.
*/
bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio);
BUG_ON(rb); /* An exclussive lock must _not_ be held for this block */
mg->overwrite_bio = NULL;
inc_io_migrations(mg->cache);
mg_full_copy(ws);
return;
}
/*
* It's safe to do this here, even though it's new data
* because all IO has been locked out of the block.
*
* mg_lock_writes() already took READ_WRITE_LOCK_LEVEL
* so _not_ using mg_upgrade_lock() as continutation.
*/
overwrite(mg, mg_update_metadata_after_copy);
} else
mg_full_copy(ws);
}
static int mg_lock_writes(struct dm_cache_migration *mg)
......@@ -1748,18 +1780,6 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
/*----------------------------------------------------------------*/
static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
{
return (bio_data_dir(bio) == WRITE) &&
(bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
}
static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
{
return writeback_mode(&cache->features) &&
(is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
}
static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
bool *commit_needed)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment