Commit f97809ae authored by Hannes Reinecke's avatar Hannes Reinecke Committed by Mike Snitzer

dm zoned: per-device reclaim

Instead of having one reclaim workqueue for the entire set we should
be allocating a reclaim workqueue per device; doing so will reduce
contention and should boost performance for a multi-device setup.
Signed-off-by: default avatarHannes Reinecke <hare@suse.de>
Reviewed-by: default avatarDamien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent 18979819
...@@ -21,6 +21,8 @@ struct dmz_reclaim { ...@@ -21,6 +21,8 @@ struct dmz_reclaim {
struct dm_kcopyd_throttle kc_throttle; struct dm_kcopyd_throttle kc_throttle;
int kc_err; int kc_err;
int dev_idx;
unsigned long flags; unsigned long flags;
/* Last target access time */ /* Last target access time */
...@@ -198,8 +200,8 @@ static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone) ...@@ -198,8 +200,8 @@ static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone)
struct dmz_metadata *zmd = zrc->metadata; struct dmz_metadata *zmd = zrc->metadata;
int ret; int ret;
DMDEBUG("(%s): Chunk %u, move buf zone %u (weight %u) to data zone %u (weight %u)", DMDEBUG("(%s/%u): Chunk %u, move buf zone %u (weight %u) to data zone %u (weight %u)",
dmz_metadata_label(zmd), dmz_metadata_label(zmd), zrc->dev_idx,
dzone->chunk, bzone->id, dmz_weight(bzone), dzone->chunk, bzone->id, dmz_weight(bzone),
dzone->id, dmz_weight(dzone)); dzone->id, dmz_weight(dzone));
...@@ -237,8 +239,8 @@ static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) ...@@ -237,8 +239,8 @@ static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
struct dmz_metadata *zmd = zrc->metadata; struct dmz_metadata *zmd = zrc->metadata;
int ret = 0; int ret = 0;
DMDEBUG("(%s): Chunk %u, move data zone %u (weight %u) to buf zone %u (weight %u)", DMDEBUG("(%s/%u): Chunk %u, move data zone %u (weight %u) to buf zone %u (weight %u)",
dmz_metadata_label(zmd), dmz_metadata_label(zmd), zrc->dev_idx,
chunk, dzone->id, dmz_weight(dzone), chunk, dzone->id, dmz_weight(dzone),
bzone->id, dmz_weight(bzone)); bzone->id, dmz_weight(bzone));
...@@ -295,8 +297,8 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) ...@@ -295,8 +297,8 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
if (!szone) if (!szone)
return -ENOSPC; return -ENOSPC;
DMDEBUG("(%s): Chunk %u, move %s zone %u (weight %u) to %s zone %u", DMDEBUG("(%s/%u): Chunk %u, move %s zone %u (weight %u) to %s zone %u",
dmz_metadata_label(zmd), chunk, dmz_metadata_label(zmd), zrc->dev_idx, chunk,
dmz_is_cache(dzone) ? "cache" : "rnd", dmz_is_cache(dzone) ? "cache" : "rnd",
dzone->id, dmz_weight(dzone), dzone->id, dmz_weight(dzone),
dmz_is_rnd(szone) ? "rnd" : "seq", szone->id); dmz_is_rnd(szone) ? "rnd" : "seq", szone->id);
...@@ -369,8 +371,8 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc) ...@@ -369,8 +371,8 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc)
/* Get a data zone */ /* Get a data zone */
dzone = dmz_get_zone_for_reclaim(zmd, dmz_target_idle(zrc)); dzone = dmz_get_zone_for_reclaim(zmd, dmz_target_idle(zrc));
if (!dzone) { if (!dzone) {
DMDEBUG("(%s): No zone found to reclaim", DMDEBUG("(%s/%u): No zone found to reclaim",
dmz_metadata_label(zmd)); dmz_metadata_label(zmd), zrc->dev_idx);
return -EBUSY; return -EBUSY;
} }
...@@ -417,24 +419,26 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc) ...@@ -417,24 +419,26 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc)
out: out:
if (ret) { if (ret) {
if (ret == -EINTR) if (ret == -EINTR)
DMDEBUG("(%s): reclaim zone %u interrupted", DMDEBUG("(%s/%u): reclaim zone %u interrupted",
dmz_metadata_label(zmd), rzone->id); dmz_metadata_label(zmd), zrc->dev_idx,
rzone->id);
else else
DMDEBUG("(%s): Failed to reclaim zone %u, err %d", DMDEBUG("(%s/%u): Failed to reclaim zone %u, err %d",
dmz_metadata_label(zmd), rzone->id, ret); dmz_metadata_label(zmd), zrc->dev_idx,
rzone->id, ret);
dmz_unlock_zone_reclaim(dzone); dmz_unlock_zone_reclaim(dzone);
return ret; return ret;
} }
ret = dmz_flush_metadata(zrc->metadata); ret = dmz_flush_metadata(zrc->metadata);
if (ret) { if (ret) {
DMDEBUG("(%s): Metadata flush for zone %u failed, err %d", DMDEBUG("(%s/%u): Metadata flush for zone %u failed, err %d",
dmz_metadata_label(zmd), rzone->id, ret); dmz_metadata_label(zmd), zrc->dev_idx, rzone->id, ret);
return ret; return ret;
} }
DMDEBUG("(%s): Reclaimed zone %u in %u ms", DMDEBUG("(%s/%u): Reclaimed zone %u in %u ms",
dmz_metadata_label(zmd), dmz_metadata_label(zmd), zrc->dev_idx,
rzone->id, jiffies_to_msecs(jiffies - start)); rzone->id, jiffies_to_msecs(jiffies - start));
return 0; return 0;
} }
...@@ -461,10 +465,20 @@ static unsigned int dmz_reclaim_percentage(struct dmz_reclaim *zrc) ...@@ -461,10 +465,20 @@ static unsigned int dmz_reclaim_percentage(struct dmz_reclaim *zrc)
*/ */
static bool dmz_should_reclaim(struct dmz_reclaim *zrc, unsigned int p_unmap) static bool dmz_should_reclaim(struct dmz_reclaim *zrc, unsigned int p_unmap)
{ {
unsigned int nr_reclaim = dmz_nr_rnd_zones(zrc->metadata); unsigned int nr_reclaim;
nr_reclaim = dmz_nr_rnd_zones(zrc->metadata);
if (dmz_nr_cache_zones(zrc->metadata)) if (dmz_nr_cache_zones(zrc->metadata)) {
/*
* The first device in a multi-device
* setup only contains cache zones, so
* never start reclaim there.
*/
if (zrc->dev_idx == 0)
return false;
nr_reclaim += dmz_nr_cache_zones(zrc->metadata); nr_reclaim += dmz_nr_cache_zones(zrc->metadata);
}
/* Reclaim when idle */ /* Reclaim when idle */
if (dmz_target_idle(zrc) && nr_reclaim) if (dmz_target_idle(zrc) && nr_reclaim)
...@@ -488,7 +502,7 @@ static void dmz_reclaim_work(struct work_struct *work) ...@@ -488,7 +502,7 @@ static void dmz_reclaim_work(struct work_struct *work)
{ {
struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work); struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work);
struct dmz_metadata *zmd = zrc->metadata; struct dmz_metadata *zmd = zrc->metadata;
unsigned int p_unmap; unsigned int p_unmap, nr_unmap_rnd = 0, nr_rnd = 0;
int ret; int ret;
if (dmz_dev_is_dying(zmd)) if (dmz_dev_is_dying(zmd))
...@@ -514,8 +528,11 @@ static void dmz_reclaim_work(struct work_struct *work) ...@@ -514,8 +528,11 @@ static void dmz_reclaim_work(struct work_struct *work)
zrc->kc_throttle.throttle = min(75U, 100U - p_unmap / 2); zrc->kc_throttle.throttle = min(75U, 100U - p_unmap / 2);
} }
DMDEBUG("(%s): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u random)", nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd);
dmz_metadata_label(zmd), nr_rnd = dmz_nr_rnd_zones(zmd);
DMDEBUG("(%s/%u): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u random)",
dmz_metadata_label(zmd), zrc->dev_idx,
zrc->kc_throttle.throttle, zrc->kc_throttle.throttle,
(dmz_target_idle(zrc) ? "Idle" : "Busy"), (dmz_target_idle(zrc) ? "Idle" : "Busy"),
p_unmap, dmz_nr_unmap_cache_zones(zmd), p_unmap, dmz_nr_unmap_cache_zones(zmd),
...@@ -536,7 +553,7 @@ static void dmz_reclaim_work(struct work_struct *work) ...@@ -536,7 +553,7 @@ static void dmz_reclaim_work(struct work_struct *work)
* Initialize reclaim. * Initialize reclaim.
*/ */
int dmz_ctr_reclaim(struct dmz_metadata *zmd, int dmz_ctr_reclaim(struct dmz_metadata *zmd,
struct dmz_reclaim **reclaim) struct dmz_reclaim **reclaim, int idx)
{ {
struct dmz_reclaim *zrc; struct dmz_reclaim *zrc;
int ret; int ret;
...@@ -547,6 +564,7 @@ int dmz_ctr_reclaim(struct dmz_metadata *zmd, ...@@ -547,6 +564,7 @@ int dmz_ctr_reclaim(struct dmz_metadata *zmd,
zrc->metadata = zmd; zrc->metadata = zmd;
zrc->atime = jiffies; zrc->atime = jiffies;
zrc->dev_idx = idx;
/* Reclaim kcopyd client */ /* Reclaim kcopyd client */
zrc->kc = dm_kcopyd_client_create(&zrc->kc_throttle); zrc->kc = dm_kcopyd_client_create(&zrc->kc_throttle);
...@@ -558,8 +576,8 @@ int dmz_ctr_reclaim(struct dmz_metadata *zmd, ...@@ -558,8 +576,8 @@ int dmz_ctr_reclaim(struct dmz_metadata *zmd,
/* Reclaim work */ /* Reclaim work */
INIT_DELAYED_WORK(&zrc->work, dmz_reclaim_work); INIT_DELAYED_WORK(&zrc->work, dmz_reclaim_work);
zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s", WQ_MEM_RECLAIM, zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s_%d", WQ_MEM_RECLAIM,
dmz_metadata_label(zmd)); dmz_metadata_label(zmd), idx);
if (!zrc->wq) { if (!zrc->wq) {
ret = -ENOMEM; ret = -ENOMEM;
goto err; goto err;
......
...@@ -41,6 +41,7 @@ struct dm_chunk_work { ...@@ -41,6 +41,7 @@ struct dm_chunk_work {
*/ */
struct dmz_target { struct dmz_target {
struct dm_dev *ddev[DMZ_MAX_DEVS]; struct dm_dev *ddev[DMZ_MAX_DEVS];
unsigned int nr_ddevs;
unsigned long flags; unsigned long flags;
...@@ -50,9 +51,6 @@ struct dmz_target { ...@@ -50,9 +51,6 @@ struct dmz_target {
/* For metadata handling */ /* For metadata handling */
struct dmz_metadata *metadata; struct dmz_metadata *metadata;
/* For reclaim */
struct dmz_reclaim *reclaim;
/* For chunk work */ /* For chunk work */
struct radix_tree_root chunk_rxtree; struct radix_tree_root chunk_rxtree;
struct workqueue_struct *chunk_wq; struct workqueue_struct *chunk_wq;
...@@ -404,14 +402,15 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw, ...@@ -404,14 +402,15 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw,
dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
struct dmz_metadata *zmd = dmz->metadata; struct dmz_metadata *zmd = dmz->metadata;
struct dm_zone *zone; struct dm_zone *zone;
int ret; int i, ret;
/* /*
* Write may trigger a zone allocation. So make sure the * Write may trigger a zone allocation. So make sure the
* allocation can succeed. * allocation can succeed.
*/ */
if (bio_op(bio) == REQ_OP_WRITE) if (bio_op(bio) == REQ_OP_WRITE)
dmz_schedule_reclaim(dmz->reclaim); for (i = 0; i < dmz->nr_ddevs; i++)
dmz_schedule_reclaim(dmz->dev[i].reclaim);
dmz_lock_metadata(zmd); dmz_lock_metadata(zmd);
...@@ -431,6 +430,7 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw, ...@@ -431,6 +430,7 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw,
if (zone) { if (zone) {
dmz_activate_zone(zone); dmz_activate_zone(zone);
bioctx->zone = zone; bioctx->zone = zone;
dmz_reclaim_bio_acc(zone->dev->reclaim);
} }
switch (bio_op(bio)) { switch (bio_op(bio)) {
...@@ -577,7 +577,6 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) ...@@ -577,7 +577,6 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
bio_list_add(&cw->bio_list, bio); bio_list_add(&cw->bio_list, bio);
dmz_reclaim_bio_acc(dmz->reclaim);
if (queue_work(dmz->chunk_wq, &cw->work)) if (queue_work(dmz->chunk_wq, &cw->work))
dmz_get_chunk_work(cw); dmz_get_chunk_work(cw);
out: out:
...@@ -822,7 +821,7 @@ static int dmz_fixup_devices(struct dm_target *ti) ...@@ -822,7 +821,7 @@ static int dmz_fixup_devices(struct dm_target *ti)
static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{ {
struct dmz_target *dmz; struct dmz_target *dmz;
int ret; int ret, i;
/* Check arguments */ /* Check arguments */
if (argc < 1 || argc > 2) { if (argc < 1 || argc > 2) {
...@@ -842,6 +841,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -842,6 +841,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
kfree(dmz); kfree(dmz);
return -ENOMEM; return -ENOMEM;
} }
dmz->nr_ddevs = argc;
ti->private = dmz; ti->private = dmz;
/* Get the target zoned block device */ /* Get the target zoned block device */
...@@ -916,10 +916,12 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -916,10 +916,12 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
mod_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD); mod_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
/* Initialize reclaim */ /* Initialize reclaim */
ret = dmz_ctr_reclaim(dmz->metadata, &dmz->reclaim); for (i = 0; i < dmz->nr_ddevs; i++) {
if (ret) { ret = dmz_ctr_reclaim(dmz->metadata, &dmz->dev[i].reclaim, i);
ti->error = "Zone reclaim initialization failed"; if (ret) {
goto err_fwq; ti->error = "Zone reclaim initialization failed";
goto err_fwq;
}
} }
DMINFO("(%s): Target device: %llu 512-byte logical sectors (%llu blocks)", DMINFO("(%s): Target device: %llu 512-byte logical sectors (%llu blocks)",
...@@ -952,11 +954,13 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -952,11 +954,13 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
static void dmz_dtr(struct dm_target *ti) static void dmz_dtr(struct dm_target *ti)
{ {
struct dmz_target *dmz = ti->private; struct dmz_target *dmz = ti->private;
int i;
flush_workqueue(dmz->chunk_wq); flush_workqueue(dmz->chunk_wq);
destroy_workqueue(dmz->chunk_wq); destroy_workqueue(dmz->chunk_wq);
dmz_dtr_reclaim(dmz->reclaim); for (i = 0; i < dmz->nr_ddevs; i++)
dmz_dtr_reclaim(dmz->dev[i].reclaim);
cancel_delayed_work_sync(&dmz->flush_work); cancel_delayed_work_sync(&dmz->flush_work);
destroy_workqueue(dmz->flush_wq); destroy_workqueue(dmz->flush_wq);
...@@ -1025,9 +1029,11 @@ static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) ...@@ -1025,9 +1029,11 @@ static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
static void dmz_suspend(struct dm_target *ti) static void dmz_suspend(struct dm_target *ti)
{ {
struct dmz_target *dmz = ti->private; struct dmz_target *dmz = ti->private;
int i;
flush_workqueue(dmz->chunk_wq); flush_workqueue(dmz->chunk_wq);
dmz_suspend_reclaim(dmz->reclaim); for (i = 0; i < dmz->nr_ddevs; i++)
dmz_suspend_reclaim(dmz->dev[i].reclaim);
cancel_delayed_work_sync(&dmz->flush_work); cancel_delayed_work_sync(&dmz->flush_work);
} }
...@@ -1037,9 +1043,11 @@ static void dmz_suspend(struct dm_target *ti) ...@@ -1037,9 +1043,11 @@ static void dmz_suspend(struct dm_target *ti)
static void dmz_resume(struct dm_target *ti) static void dmz_resume(struct dm_target *ti)
{ {
struct dmz_target *dmz = ti->private; struct dmz_target *dmz = ti->private;
int i;
queue_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD); queue_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
dmz_resume_reclaim(dmz->reclaim); for (i = 0; i < dmz->nr_ddevs; i++)
dmz_resume_reclaim(dmz->dev[i].reclaim);
} }
static int dmz_iterate_devices(struct dm_target *ti, static int dmz_iterate_devices(struct dm_target *ti,
...@@ -1100,7 +1108,10 @@ static int dmz_message(struct dm_target *ti, unsigned int argc, char **argv, ...@@ -1100,7 +1108,10 @@ static int dmz_message(struct dm_target *ti, unsigned int argc, char **argv,
int r = -EINVAL; int r = -EINVAL;
if (!strcasecmp(argv[0], "reclaim")) { if (!strcasecmp(argv[0], "reclaim")) {
dmz_schedule_reclaim(dmz->reclaim); int i;
for (i = 0; i < dmz->nr_ddevs; i++)
dmz_schedule_reclaim(dmz->dev[i].reclaim);
r = 0; r = 0;
} else } else
DMERR("unrecognized message %s", argv[0]); DMERR("unrecognized message %s", argv[0]);
......
...@@ -54,6 +54,7 @@ struct dmz_reclaim; ...@@ -54,6 +54,7 @@ struct dmz_reclaim;
struct dmz_dev { struct dmz_dev {
struct block_device *bdev; struct block_device *bdev;
struct dmz_metadata *metadata; struct dmz_metadata *metadata;
struct dmz_reclaim *reclaim;
char name[BDEVNAME_SIZE]; char name[BDEVNAME_SIZE];
uuid_t uuid; uuid_t uuid;
...@@ -229,23 +230,6 @@ static inline void dmz_activate_zone(struct dm_zone *zone) ...@@ -229,23 +230,6 @@ static inline void dmz_activate_zone(struct dm_zone *zone)
atomic_inc(&zone->refcount); atomic_inc(&zone->refcount);
} }
/*
* Deactivate a zone. This decrement the zone reference counter
* indicating that all BIOs to the zone have completed when the count is 0.
*/
static inline void dmz_deactivate_zone(struct dm_zone *zone)
{
atomic_dec(&zone->refcount);
}
/*
* Test if a zone is active, that is, has a refcount > 0.
*/
static inline bool dmz_is_active(struct dm_zone *zone)
{
return atomic_read(&zone->refcount);
}
int dmz_lock_zone_reclaim(struct dm_zone *zone); int dmz_lock_zone_reclaim(struct dm_zone *zone);
void dmz_unlock_zone_reclaim(struct dm_zone *zone); void dmz_unlock_zone_reclaim(struct dm_zone *zone);
struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, bool idle); struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, bool idle);
...@@ -272,7 +256,7 @@ int dmz_merge_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone, ...@@ -272,7 +256,7 @@ int dmz_merge_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone,
/* /*
* Functions defined in dm-zoned-reclaim.c * Functions defined in dm-zoned-reclaim.c
*/ */
int dmz_ctr_reclaim(struct dmz_metadata *zmd, struct dmz_reclaim **zrc); int dmz_ctr_reclaim(struct dmz_metadata *zmd, struct dmz_reclaim **zrc, int idx);
void dmz_dtr_reclaim(struct dmz_reclaim *zrc); void dmz_dtr_reclaim(struct dmz_reclaim *zrc);
void dmz_suspend_reclaim(struct dmz_reclaim *zrc); void dmz_suspend_reclaim(struct dmz_reclaim *zrc);
void dmz_resume_reclaim(struct dmz_reclaim *zrc); void dmz_resume_reclaim(struct dmz_reclaim *zrc);
...@@ -285,4 +269,22 @@ void dmz_schedule_reclaim(struct dmz_reclaim *zrc); ...@@ -285,4 +269,22 @@ void dmz_schedule_reclaim(struct dmz_reclaim *zrc);
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev); bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev);
bool dmz_check_bdev(struct dmz_dev *dmz_dev); bool dmz_check_bdev(struct dmz_dev *dmz_dev);
/*
* Deactivate a zone. This decrement the zone reference counter
* indicating that all BIOs to the zone have completed when the count is 0.
*/
static inline void dmz_deactivate_zone(struct dm_zone *zone)
{
dmz_reclaim_bio_acc(zone->dev->reclaim);
atomic_dec(&zone->refcount);
}
/*
* Test if a zone is active, that is, has a refcount > 0.
*/
static inline bool dmz_is_active(struct dm_zone *zone)
{
return atomic_read(&zone->refcount);
}
#endif /* DM_ZONED_H */ #endif /* DM_ZONED_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment