Commit fb8527e5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.4/dm-fixes' of...

Merge tag 'for-5.4/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Fix DM snapshot deadlock that can occur due to COW throttling
   preventing locks from being released.

 - Fix DM cache's GFP_NOWAIT allocation failure error paths by switching
   to GFP_NOIO.

 - Make __hash_find() static in the DM clone target.

* tag 'for-5.4/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm cache: fix bugs when a GFP_NOWAIT allocation fails
  dm snapshot: rework COW throttling to fix deadlock
  dm snapshot: introduce account_start_copy() and account_end_copy()
  dm clone: Make __hash_find static
parents 90105ae1 13bd677a
...@@ -542,7 +542,7 @@ static void wake_migration_worker(struct cache *cache) ...@@ -542,7 +542,7 @@ static void wake_migration_worker(struct cache *cache)
static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache) static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache)
{ {
return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOWAIT); return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO);
} }
static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell) static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell)
...@@ -554,9 +554,7 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache) ...@@ -554,9 +554,7 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache)
{ {
struct dm_cache_migration *mg; struct dm_cache_migration *mg;
mg = mempool_alloc(&cache->migration_pool, GFP_NOWAIT); mg = mempool_alloc(&cache->migration_pool, GFP_NOIO);
if (!mg)
return NULL;
memset(mg, 0, sizeof(*mg)); memset(mg, 0, sizeof(*mg));
...@@ -664,10 +662,6 @@ static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bi ...@@ -664,10 +662,6 @@ static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bi
struct dm_bio_prison_cell_v2 *cell_prealloc, *cell; struct dm_bio_prison_cell_v2 *cell_prealloc, *cell;
cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */ cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */
if (!cell_prealloc) {
defer_bio(cache, bio);
return false;
}
build_key(oblock, end, &key); build_key(oblock, end, &key);
r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell); r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell);
...@@ -1493,11 +1487,6 @@ static int mg_lock_writes(struct dm_cache_migration *mg) ...@@ -1493,11 +1487,6 @@ static int mg_lock_writes(struct dm_cache_migration *mg)
struct dm_bio_prison_cell_v2 *prealloc; struct dm_bio_prison_cell_v2 *prealloc;
prealloc = alloc_prison_cell(cache); prealloc = alloc_prison_cell(cache);
if (!prealloc) {
DMERR_LIMIT("%s: alloc_prison_cell failed", cache_device_name(cache));
mg_complete(mg, false);
return -ENOMEM;
}
/* /*
* Prevent writes to the block, but allow reads to continue. * Prevent writes to the block, but allow reads to continue.
...@@ -1535,11 +1524,6 @@ static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio ...@@ -1535,11 +1524,6 @@ static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio
} }
mg = alloc_migration(cache); mg = alloc_migration(cache);
if (!mg) {
policy_complete_background_work(cache->policy, op, false);
background_work_end(cache);
return -ENOMEM;
}
mg->op = op; mg->op = op;
mg->overwrite_bio = bio; mg->overwrite_bio = bio;
...@@ -1628,10 +1612,6 @@ static int invalidate_lock(struct dm_cache_migration *mg) ...@@ -1628,10 +1612,6 @@ static int invalidate_lock(struct dm_cache_migration *mg)
struct dm_bio_prison_cell_v2 *prealloc; struct dm_bio_prison_cell_v2 *prealloc;
prealloc = alloc_prison_cell(cache); prealloc = alloc_prison_cell(cache);
if (!prealloc) {
invalidate_complete(mg, false);
return -ENOMEM;
}
build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key); build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key);
r = dm_cell_lock_v2(cache->prison, &key, r = dm_cell_lock_v2(cache->prison, &key,
...@@ -1669,10 +1649,6 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock, ...@@ -1669,10 +1649,6 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
return -EPERM; return -EPERM;
mg = alloc_migration(cache); mg = alloc_migration(cache);
if (!mg) {
background_work_end(cache);
return -ENOMEM;
}
mg->overwrite_bio = bio; mg->overwrite_bio = bio;
mg->invalidate_cblock = cblock; mg->invalidate_cblock = cblock;
......
...@@ -591,7 +591,7 @@ static struct hash_table_bucket *get_hash_table_bucket(struct clone *clone, ...@@ -591,7 +591,7 @@ static struct hash_table_bucket *get_hash_table_bucket(struct clone *clone,
* *
* NOTE: Must be called with the bucket lock held * NOTE: Must be called with the bucket lock held
*/ */
struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket, static struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket,
unsigned long region_nr) unsigned long region_nr)
{ {
struct dm_clone_region_hydration *hd; struct dm_clone_region_hydration *hd;
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/log2.h> #include <linux/log2.h>
#include <linux/dm-kcopyd.h> #include <linux/dm-kcopyd.h>
#include <linux/semaphore.h>
#include "dm.h" #include "dm.h"
...@@ -107,8 +106,8 @@ struct dm_snapshot { ...@@ -107,8 +106,8 @@ struct dm_snapshot {
/* The on disk metadata handler */ /* The on disk metadata handler */
struct dm_exception_store *store; struct dm_exception_store *store;
/* Maximum number of in-flight COW jobs. */ unsigned in_progress;
struct semaphore cow_count; struct wait_queue_head in_progress_wait;
struct dm_kcopyd_client *kcopyd_client; struct dm_kcopyd_client *kcopyd_client;
...@@ -162,8 +161,8 @@ struct dm_snapshot { ...@@ -162,8 +161,8 @@ struct dm_snapshot {
*/ */
#define DEFAULT_COW_THRESHOLD 2048 #define DEFAULT_COW_THRESHOLD 2048
static int cow_threshold = DEFAULT_COW_THRESHOLD; static unsigned cow_threshold = DEFAULT_COW_THRESHOLD;
module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644);
MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
...@@ -1327,7 +1326,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1327,7 +1326,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad_hash_tables; goto bad_hash_tables;
} }
sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); init_waitqueue_head(&s->in_progress_wait);
s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
if (IS_ERR(s->kcopyd_client)) { if (IS_ERR(s->kcopyd_client)) {
...@@ -1509,9 +1508,56 @@ static void snapshot_dtr(struct dm_target *ti) ...@@ -1509,9 +1508,56 @@ static void snapshot_dtr(struct dm_target *ti)
dm_put_device(ti, s->origin); dm_put_device(ti, s->origin);
WARN_ON(s->in_progress);
kfree(s); kfree(s);
} }
static void account_start_copy(struct dm_snapshot *s)
{
spin_lock(&s->in_progress_wait.lock);
s->in_progress++;
spin_unlock(&s->in_progress_wait.lock);
}
static void account_end_copy(struct dm_snapshot *s)
{
spin_lock(&s->in_progress_wait.lock);
BUG_ON(!s->in_progress);
s->in_progress--;
if (likely(s->in_progress <= cow_threshold) &&
unlikely(waitqueue_active(&s->in_progress_wait)))
wake_up_locked(&s->in_progress_wait);
spin_unlock(&s->in_progress_wait.lock);
}
static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins)
{
if (unlikely(s->in_progress > cow_threshold)) {
spin_lock(&s->in_progress_wait.lock);
if (likely(s->in_progress > cow_threshold)) {
/*
* NOTE: this throttle doesn't account for whether
* the caller is servicing an IO that will trigger a COW
* so excess throttling may result for chunks not required
* to be COW'd. But if cow_threshold was reached, extra
* throttling is unlikely to negatively impact performance.
*/
DECLARE_WAITQUEUE(wait, current);
__add_wait_queue(&s->in_progress_wait, &wait);
__set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock(&s->in_progress_wait.lock);
if (unlock_origins)
up_read(&_origins_lock);
io_schedule();
remove_wait_queue(&s->in_progress_wait, &wait);
return false;
}
spin_unlock(&s->in_progress_wait.lock);
}
return true;
}
/* /*
* Flush a list of buffers. * Flush a list of buffers.
*/ */
...@@ -1527,7 +1573,7 @@ static void flush_bios(struct bio *bio) ...@@ -1527,7 +1573,7 @@ static void flush_bios(struct bio *bio)
} }
} }
static int do_origin(struct dm_dev *origin, struct bio *bio); static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit);
/* /*
* Flush a list of buffers. * Flush a list of buffers.
...@@ -1540,7 +1586,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) ...@@ -1540,7 +1586,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
while (bio) { while (bio) {
n = bio->bi_next; n = bio->bi_next;
bio->bi_next = NULL; bio->bi_next = NULL;
r = do_origin(s->origin, bio); r = do_origin(s->origin, bio, false);
if (r == DM_MAPIO_REMAPPED) if (r == DM_MAPIO_REMAPPED)
generic_make_request(bio); generic_make_request(bio);
bio = n; bio = n;
...@@ -1732,7 +1778,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) ...@@ -1732,7 +1778,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
rb_link_node(&pe->out_of_order_node, parent, p); rb_link_node(&pe->out_of_order_node, parent, p);
rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree); rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
} }
up(&s->cow_count); account_end_copy(s);
} }
/* /*
...@@ -1756,7 +1802,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) ...@@ -1756,7 +1802,7 @@ static void start_copy(struct dm_snap_pending_exception *pe)
dest.count = src.count; dest.count = src.count;
/* Hand over to kcopyd */ /* Hand over to kcopyd */
down(&s->cow_count); account_start_copy(s);
dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
} }
...@@ -1776,7 +1822,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, ...@@ -1776,7 +1822,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
pe->full_bio = bio; pe->full_bio = bio;
pe->full_bio_end_io = bio->bi_end_io; pe->full_bio_end_io = bio->bi_end_io;
down(&s->cow_count); account_start_copy(s);
callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
copy_callback, pe); copy_callback, pe);
...@@ -1866,7 +1912,7 @@ static void zero_callback(int read_err, unsigned long write_err, void *context) ...@@ -1866,7 +1912,7 @@ static void zero_callback(int read_err, unsigned long write_err, void *context)
struct bio *bio = context; struct bio *bio = context;
struct dm_snapshot *s = bio->bi_private; struct dm_snapshot *s = bio->bi_private;
up(&s->cow_count); account_end_copy(s);
bio->bi_status = write_err ? BLK_STS_IOERR : 0; bio->bi_status = write_err ? BLK_STS_IOERR : 0;
bio_endio(bio); bio_endio(bio);
} }
...@@ -1880,7 +1926,7 @@ static void zero_exception(struct dm_snapshot *s, struct dm_exception *e, ...@@ -1880,7 +1926,7 @@ static void zero_exception(struct dm_snapshot *s, struct dm_exception *e,
dest.sector = bio->bi_iter.bi_sector; dest.sector = bio->bi_iter.bi_sector;
dest.count = s->store->chunk_size; dest.count = s->store->chunk_size;
down(&s->cow_count); account_start_copy(s);
WARN_ON_ONCE(bio->bi_private); WARN_ON_ONCE(bio->bi_private);
bio->bi_private = s; bio->bi_private = s;
dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio); dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio);
...@@ -1916,6 +1962,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) ...@@ -1916,6 +1962,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
if (!s->valid) if (!s->valid)
return DM_MAPIO_KILL; return DM_MAPIO_KILL;
if (bio_data_dir(bio) == WRITE) {
while (unlikely(!wait_for_in_progress(s, false)))
; /* wait_for_in_progress() has slept */
}
down_read(&s->lock); down_read(&s->lock);
dm_exception_table_lock(&lock); dm_exception_table_lock(&lock);
...@@ -2112,7 +2163,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) ...@@ -2112,7 +2163,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
if (bio_data_dir(bio) == WRITE) { if (bio_data_dir(bio) == WRITE) {
up_write(&s->lock); up_write(&s->lock);
return do_origin(s->origin, bio); return do_origin(s->origin, bio, false);
} }
out_unlock: out_unlock:
...@@ -2487,15 +2538,24 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, ...@@ -2487,15 +2538,24 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
/* /*
* Called on a write from the origin driver. * Called on a write from the origin driver.
*/ */
static int do_origin(struct dm_dev *origin, struct bio *bio) static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit)
{ {
struct origin *o; struct origin *o;
int r = DM_MAPIO_REMAPPED; int r = DM_MAPIO_REMAPPED;
again:
down_read(&_origins_lock); down_read(&_origins_lock);
o = __lookup_origin(origin->bdev); o = __lookup_origin(origin->bdev);
if (o) if (o) {
if (limit) {
struct dm_snapshot *s;
list_for_each_entry(s, &o->snapshots, list)
if (unlikely(!wait_for_in_progress(s, true)))
goto again;
}
r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio); r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
}
up_read(&_origins_lock); up_read(&_origins_lock);
return r; return r;
...@@ -2608,7 +2668,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio) ...@@ -2608,7 +2668,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
dm_accept_partial_bio(bio, available_sectors); dm_accept_partial_bio(bio, available_sectors);
/* Only tell snapshots if this is a write */ /* Only tell snapshots if this is a write */
return do_origin(o->dev, bio); return do_origin(o->dev, bio, true);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment