Commit f506566c authored by Joe Thornber's avatar Joe Thornber Committed by Kamal Mostafa

dm cache: fix race when issuing a POLICY_REPLACE operation

commit fb4100ae upstream.

There is a race between a policy deciding to replace a cache entry,
the core target writing back any dirty data from this block, and other
IO threads doing IO to the same block.

This sort of problem is avoided most of the time by the core target
grabbing a bio prison cell before making the request to the policy.
But for a demotion the core target doesn't know which block will be
demoted, so can't do this in advance.

Fix this demotion race by introducing a callback to the policy interface
that allows the policy to grab the cell on behalf of the core target.
Signed-off-by: default avatarJoe Thornber <ejt@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
parent dd53e388
...@@ -171,7 +171,8 @@ static void remove_cache_hash_entry(struct wb_cache_entry *e) ...@@ -171,7 +171,8 @@ static void remove_cache_hash_entry(struct wb_cache_entry *e)
/* Public interface (see dm-cache-policy.h */ /* Public interface (see dm-cache-policy.h */
static int wb_map(struct dm_cache_policy *pe, dm_oblock_t oblock, static int wb_map(struct dm_cache_policy *pe, dm_oblock_t oblock,
bool can_block, bool can_migrate, bool discarded_oblock, bool can_block, bool can_migrate, bool discarded_oblock,
struct bio *bio, struct policy_result *result) struct bio *bio, struct policy_locker *locker,
struct policy_result *result)
{ {
struct policy *p = to_policy(pe); struct policy *p = to_policy(pe);
struct wb_cache_entry *e; struct wb_cache_entry *e;
......
...@@ -16,9 +16,10 @@ ...@@ -16,9 +16,10 @@
*/ */
static inline int policy_map(struct dm_cache_policy *p, dm_oblock_t oblock, static inline int policy_map(struct dm_cache_policy *p, dm_oblock_t oblock,
bool can_block, bool can_migrate, bool discarded_oblock, bool can_block, bool can_migrate, bool discarded_oblock,
struct bio *bio, struct policy_result *result) struct bio *bio, struct policy_locker *locker,
struct policy_result *result)
{ {
return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, result); return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, locker, result);
} }
static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock) static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
......
...@@ -616,9 +616,10 @@ static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e) ...@@ -616,9 +616,10 @@ static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e)
* - set the hit count to a hard coded value other than 1, eg, is it better * - set the hit count to a hard coded value other than 1, eg, is it better
* if it goes in at level 2? * if it goes in at level 2?
*/ */
static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) static int demote_cblock(struct mq_policy *mq,
struct policy_locker *locker, dm_oblock_t *oblock)
{ {
struct entry *demoted = pop(mq, &mq->cache_clean); struct entry *demoted = peek(&mq->cache_clean);
if (!demoted) if (!demoted)
/* /*
...@@ -630,6 +631,13 @@ static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) ...@@ -630,6 +631,13 @@ static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock)
*/ */
return -ENOSPC; return -ENOSPC;
if (locker->fn(locker, demoted->oblock))
/*
* We couldn't lock the demoted block.
*/
return -EBUSY;
del(mq, demoted);
*oblock = demoted->oblock; *oblock = demoted->oblock;
free_entry(&mq->cache_pool, demoted); free_entry(&mq->cache_pool, demoted);
...@@ -718,6 +726,7 @@ static int cache_entry_found(struct mq_policy *mq, ...@@ -718,6 +726,7 @@ static int cache_entry_found(struct mq_policy *mq,
* finding which cache block to use. * finding which cache block to use.
*/ */
static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
struct policy_locker *locker,
struct policy_result *result) struct policy_result *result)
{ {
int r; int r;
...@@ -726,11 +735,12 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, ...@@ -726,11 +735,12 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
/* Ensure there's a free cblock in the cache */ /* Ensure there's a free cblock in the cache */
if (epool_empty(&mq->cache_pool)) { if (epool_empty(&mq->cache_pool)) {
result->op = POLICY_REPLACE; result->op = POLICY_REPLACE;
r = demote_cblock(mq, &result->old_oblock); r = demote_cblock(mq, locker, &result->old_oblock);
if (r) { if (r) {
result->op = POLICY_MISS; result->op = POLICY_MISS;
return 0; return 0;
} }
} else } else
result->op = POLICY_NEW; result->op = POLICY_NEW;
...@@ -754,7 +764,8 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, ...@@ -754,7 +764,8 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e,
bool can_migrate, bool discarded_oblock, bool can_migrate, bool discarded_oblock,
int data_dir, struct policy_result *result) int data_dir, struct policy_locker *locker,
struct policy_result *result)
{ {
int r = 0; int r = 0;
bool updated = updated_this_tick(mq, e); bool updated = updated_this_tick(mq, e);
...@@ -769,7 +780,7 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, ...@@ -769,7 +780,7 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e,
else { else {
requeue_and_update_tick(mq, e); requeue_and_update_tick(mq, e);
r = pre_cache_to_cache(mq, e, result); r = pre_cache_to_cache(mq, e, locker, result);
} }
return r; return r;
...@@ -800,6 +811,7 @@ static void insert_in_pre_cache(struct mq_policy *mq, ...@@ -800,6 +811,7 @@ static void insert_in_pre_cache(struct mq_policy *mq,
} }
static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
struct policy_locker *locker,
struct policy_result *result) struct policy_result *result)
{ {
int r; int r;
...@@ -807,7 +819,7 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, ...@@ -807,7 +819,7 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
if (epool_empty(&mq->cache_pool)) { if (epool_empty(&mq->cache_pool)) {
result->op = POLICY_REPLACE; result->op = POLICY_REPLACE;
r = demote_cblock(mq, &result->old_oblock); r = demote_cblock(mq, locker, &result->old_oblock);
if (unlikely(r)) { if (unlikely(r)) {
result->op = POLICY_MISS; result->op = POLICY_MISS;
insert_in_pre_cache(mq, oblock); insert_in_pre_cache(mq, oblock);
...@@ -836,11 +848,12 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, ...@@ -836,11 +848,12 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock,
bool can_migrate, bool discarded_oblock, bool can_migrate, bool discarded_oblock,
int data_dir, struct policy_result *result) int data_dir, struct policy_locker *locker,
struct policy_result *result)
{ {
if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) <= 1) { if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) <= 1) {
if (can_migrate) if (can_migrate)
insert_in_cache(mq, oblock, result); insert_in_cache(mq, oblock, locker, result);
else else
return -EWOULDBLOCK; return -EWOULDBLOCK;
} else { } else {
...@@ -857,7 +870,8 @@ static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, ...@@ -857,7 +870,8 @@ static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock,
*/ */
static int map(struct mq_policy *mq, dm_oblock_t oblock, static int map(struct mq_policy *mq, dm_oblock_t oblock,
bool can_migrate, bool discarded_oblock, bool can_migrate, bool discarded_oblock,
int data_dir, struct policy_result *result) int data_dir, struct policy_locker *locker,
struct policy_result *result)
{ {
int r = 0; int r = 0;
struct entry *e = hash_lookup(mq, oblock); struct entry *e = hash_lookup(mq, oblock);
...@@ -871,11 +885,11 @@ static int map(struct mq_policy *mq, dm_oblock_t oblock, ...@@ -871,11 +885,11 @@ static int map(struct mq_policy *mq, dm_oblock_t oblock,
else if (e) else if (e)
r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock, r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock,
data_dir, result); data_dir, locker, result);
else else
r = no_entry_found(mq, oblock, can_migrate, discarded_oblock, r = no_entry_found(mq, oblock, can_migrate, discarded_oblock,
data_dir, result); data_dir, locker, result);
if (r == -EWOULDBLOCK) if (r == -EWOULDBLOCK)
result->op = POLICY_MISS; result->op = POLICY_MISS;
...@@ -916,7 +930,8 @@ static void copy_tick(struct mq_policy *mq) ...@@ -916,7 +930,8 @@ static void copy_tick(struct mq_policy *mq)
static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock, static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
bool can_block, bool can_migrate, bool discarded_oblock, bool can_block, bool can_migrate, bool discarded_oblock,
struct bio *bio, struct policy_result *result) struct bio *bio, struct policy_locker *locker,
struct policy_result *result)
{ {
int r; int r;
struct mq_policy *mq = to_mq_policy(p); struct mq_policy *mq = to_mq_policy(p);
...@@ -932,7 +947,7 @@ static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock, ...@@ -932,7 +947,7 @@ static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
iot_examine_bio(&mq->tracker, bio); iot_examine_bio(&mq->tracker, bio);
r = map(mq, oblock, can_migrate, discarded_oblock, r = map(mq, oblock, can_migrate, discarded_oblock,
bio_data_dir(bio), result); bio_data_dir(bio), locker, result);
mutex_unlock(&mq->lock); mutex_unlock(&mq->lock);
......
...@@ -69,6 +69,18 @@ enum policy_operation { ...@@ -69,6 +69,18 @@ enum policy_operation {
POLICY_REPLACE POLICY_REPLACE
}; };
/*
* When issuing a POLICY_REPLACE the policy needs to make a callback to
* lock the block being demoted. This doesn't need to occur during a
* writeback operation since the block remains in the cache.
*/
struct policy_locker;
typedef int (*policy_lock_fn)(struct policy_locker *l, dm_oblock_t oblock);
struct policy_locker {
policy_lock_fn fn;
};
/* /*
* This is the instruction passed back to the core target. * This is the instruction passed back to the core target.
*/ */
...@@ -122,7 +134,8 @@ struct dm_cache_policy { ...@@ -122,7 +134,8 @@ struct dm_cache_policy {
*/ */
int (*map)(struct dm_cache_policy *p, dm_oblock_t oblock, int (*map)(struct dm_cache_policy *p, dm_oblock_t oblock,
bool can_block, bool can_migrate, bool discarded_oblock, bool can_block, bool can_migrate, bool discarded_oblock,
struct bio *bio, struct policy_result *result); struct bio *bio, struct policy_locker *locker,
struct policy_result *result);
/* /*
* Sometimes we want to see if a block is in the cache, without * Sometimes we want to see if a block is in the cache, without
......
...@@ -1444,16 +1444,43 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio) ...@@ -1444,16 +1444,43 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
&cache->stats.read_miss : &cache->stats.write_miss); &cache->stats.read_miss : &cache->stats.write_miss);
} }
/*----------------------------------------------------------------*/
struct old_oblock_lock {
struct policy_locker locker;
struct cache *cache;
struct prealloc *structs;
struct dm_bio_prison_cell *cell;
};
static int null_locker(struct policy_locker *locker, dm_oblock_t b)
{
/* This should never be called */
BUG();
return 0;
}
static int cell_locker(struct policy_locker *locker, dm_oblock_t b)
{
struct old_oblock_lock *l = container_of(locker, struct old_oblock_lock, locker);
struct dm_bio_prison_cell *cell_prealloc = prealloc_get_cell(l->structs);
return bio_detain(l->cache, b, NULL, cell_prealloc,
(cell_free_fn) prealloc_put_cell,
l->structs, &l->cell);
}
static void process_bio(struct cache *cache, struct prealloc *structs, static void process_bio(struct cache *cache, struct prealloc *structs,
struct bio *bio) struct bio *bio)
{ {
int r; int r;
bool release_cell = true; bool release_cell = true;
dm_oblock_t block = get_bio_block(cache, bio); dm_oblock_t block = get_bio_block(cache, bio);
struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
struct policy_result lookup_result; struct policy_result lookup_result;
bool passthrough = passthrough_mode(&cache->features); bool passthrough = passthrough_mode(&cache->features);
bool discarded_block, can_migrate; bool discarded_block, can_migrate;
struct old_oblock_lock ool;
/* /*
* Check to see if that block is currently migrating. * Check to see if that block is currently migrating.
...@@ -1468,8 +1495,12 @@ static void process_bio(struct cache *cache, struct prealloc *structs, ...@@ -1468,8 +1495,12 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
discarded_block = is_discarded_oblock(cache, block); discarded_block = is_discarded_oblock(cache, block);
can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
ool.locker.fn = cell_locker;
ool.cache = cache;
ool.structs = structs;
ool.cell = NULL;
r = policy_map(cache->policy, block, true, can_migrate, discarded_block, r = policy_map(cache->policy, block, true, can_migrate, discarded_block,
bio, &lookup_result); bio, &ool.locker, &lookup_result);
if (r == -EWOULDBLOCK) if (r == -EWOULDBLOCK)
/* migration has been denied */ /* migration has been denied */
...@@ -1526,27 +1557,11 @@ static void process_bio(struct cache *cache, struct prealloc *structs, ...@@ -1526,27 +1557,11 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
break; break;
case POLICY_REPLACE: case POLICY_REPLACE:
cell_prealloc = prealloc_get_cell(structs);
r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc,
(cell_free_fn) prealloc_put_cell,
structs, &old_ocell);
if (r > 0) {
/*
* We have to be careful to avoid lock inversion of
* the cells. So we back off, and wait for the
* old_ocell to become free.
*/
policy_force_mapping(cache->policy, block,
lookup_result.old_oblock);
atomic_inc(&cache->stats.cache_cell_clash);
break;
}
atomic_inc(&cache->stats.demotion); atomic_inc(&cache->stats.demotion);
atomic_inc(&cache->stats.promotion); atomic_inc(&cache->stats.promotion);
demote_then_promote(cache, structs, lookup_result.old_oblock, demote_then_promote(cache, structs, lookup_result.old_oblock,
block, lookup_result.cblock, block, lookup_result.cblock,
old_ocell, new_ocell); ool.cell, new_ocell);
release_cell = false; release_cell = false;
break; break;
...@@ -2594,6 +2609,9 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso ...@@ -2594,6 +2609,9 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso
bool discarded_block; bool discarded_block;
struct policy_result lookup_result; struct policy_result lookup_result;
struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
struct old_oblock_lock ool;
ool.locker.fn = null_locker;
if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
/* /*
...@@ -2632,7 +2650,7 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso ...@@ -2632,7 +2650,7 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso
discarded_block = is_discarded_oblock(cache, block); discarded_block = is_discarded_oblock(cache, block);
r = policy_map(cache->policy, block, false, can_migrate, discarded_block, r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
bio, &lookup_result); bio, &ool.locker, &lookup_result);
if (r == -EWOULDBLOCK) { if (r == -EWOULDBLOCK) {
cell_defer(cache, *cell, true); cell_defer(cache, *cell, true);
return DM_MAPIO_SUBMITTED; return DM_MAPIO_SUBMITTED;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment