Commit 907c3eb1 authored by Bob Liu's avatar Bob Liu Committed by David Vrabel

xen-blkfront: convert to blk-mq APIs

Note: This patch is based on original work of Arianna's internship for
GNOME's Outreach Program for Women.

Only one hardware queue is used now, so there is no significant
performance change

The legacy non-mq code is deleted completely which is the same as other
drivers like virtio, mtip, and nvme.

Also dropped one unnecessary holding of info->io_lock when calling
blk_mq_stop_hw_queues().
Signed-off-by: default avatarArianna Avanzini <avanzini.arianna@gmail.com>
Signed-off-by: default avatarBob Liu <bob.liu@oracle.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Acked-by: default avatarJens Axboe <axboe@fb.com>
Signed-off-by: default avatarDavid Vrabel <david.vrabel@citrix.com>
parent a7da51ae
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/hdreg.h> #include <linux/hdreg.h>
#include <linux/cdrom.h> #include <linux/cdrom.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -148,6 +149,7 @@ struct blkfront_info ...@@ -148,6 +149,7 @@ struct blkfront_info
unsigned int feature_persistent:1; unsigned int feature_persistent:1;
unsigned int max_indirect_segments; unsigned int max_indirect_segments;
int is_ready; int is_ready;
struct blk_mq_tag_set tag_set;
}; };
static unsigned int nr_minors; static unsigned int nr_minors;
...@@ -617,54 +619,41 @@ static inline bool blkif_request_flush_invalid(struct request *req, ...@@ -617,54 +619,41 @@ static inline bool blkif_request_flush_invalid(struct request *req,
!(info->feature_flush & REQ_FUA))); !(info->feature_flush & REQ_FUA)));
} }
/* static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
* do_blkif_request const struct blk_mq_queue_data *qd)
* read a block; request is in a request queue
*/
static void do_blkif_request(struct request_queue *rq)
{ {
struct blkfront_info *info = NULL; struct blkfront_info *info = qd->rq->rq_disk->private_data;
struct request *req;
int queued;
pr_debug("Entered do_blkif_request\n");
queued = 0;
while ((req = blk_peek_request(rq)) != NULL) {
info = req->rq_disk->private_data;
blk_mq_start_request(qd->rq);
spin_lock_irq(&info->io_lock);
if (RING_FULL(&info->ring)) if (RING_FULL(&info->ring))
goto wait; goto out_busy;
blk_start_request(req);
if (blkif_request_flush_invalid(req, info)) { if (blkif_request_flush_invalid(qd->rq, info))
__blk_end_request_all(req, -EOPNOTSUPP); goto out_err;
continue;
}
pr_debug("do_blk_req %p: cmd %p, sec %lx, " if (blkif_queue_request(qd->rq))
"(%u/%u) [%s]\n", goto out_busy;
req, req->cmd, (unsigned long)blk_rq_pos(req),
blk_rq_cur_sectors(req), blk_rq_sectors(req),
rq_data_dir(req) ? "write" : "read");
if (blkif_queue_request(req)) { flush_requests(info);
blk_requeue_request(rq, req); spin_unlock_irq(&info->io_lock);
wait: return BLK_MQ_RQ_QUEUE_OK;
/* Avoid pointless unplugs. */
blk_stop_queue(rq);
break;
}
queued++; out_err:
} spin_unlock_irq(&info->io_lock);
return BLK_MQ_RQ_QUEUE_ERROR;
if (queued != 0) out_busy:
flush_requests(info); spin_unlock_irq(&info->io_lock);
blk_mq_stop_hw_queue(hctx);
return BLK_MQ_RQ_QUEUE_BUSY;
} }
static struct blk_mq_ops blkfront_mq_ops = {
.queue_rq = blkif_queue_rq,
.map_queue = blk_mq_map_queue,
};
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
unsigned int physical_sector_size, unsigned int physical_sector_size,
unsigned int segments) unsigned int segments)
...@@ -672,9 +661,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, ...@@ -672,9 +661,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
struct request_queue *rq; struct request_queue *rq;
struct blkfront_info *info = gd->private_data; struct blkfront_info *info = gd->private_data;
rq = blk_init_queue(do_blkif_request, &info->io_lock); memset(&info->tag_set, 0, sizeof(info->tag_set));
if (rq == NULL) info->tag_set.ops = &blkfront_mq_ops;
info->tag_set.nr_hw_queues = 1;
info->tag_set.queue_depth = BLK_RING_SIZE(info);
info->tag_set.numa_node = NUMA_NO_NODE;
info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
info->tag_set.cmd_size = 0;
info->tag_set.driver_data = info;
if (blk_mq_alloc_tag_set(&info->tag_set))
return -1;
rq = blk_mq_init_queue(&info->tag_set);
if (IS_ERR(rq)) {
blk_mq_free_tag_set(&info->tag_set);
return -1; return -1;
}
queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
...@@ -902,19 +904,15 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, ...@@ -902,19 +904,15 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
static void xlvbd_release_gendisk(struct blkfront_info *info) static void xlvbd_release_gendisk(struct blkfront_info *info)
{ {
unsigned int minor, nr_minors; unsigned int minor, nr_minors;
unsigned long flags;
if (info->rq == NULL) if (info->rq == NULL)
return; return;
spin_lock_irqsave(&info->io_lock, flags);
/* No more blkif_request(). */ /* No more blkif_request(). */
blk_stop_queue(info->rq); blk_mq_stop_hw_queues(info->rq);
/* No more gnttab callback work. */ /* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback); gnttab_cancel_free_callback(&info->callback);
spin_unlock_irqrestore(&info->io_lock, flags);
/* Flush gnttab callback work. Must be done with no locks held. */ /* Flush gnttab callback work. Must be done with no locks held. */
flush_work(&info->work); flush_work(&info->work);
...@@ -926,20 +924,18 @@ static void xlvbd_release_gendisk(struct blkfront_info *info) ...@@ -926,20 +924,18 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
xlbd_release_minors(minor, nr_minors); xlbd_release_minors(minor, nr_minors);
blk_cleanup_queue(info->rq); blk_cleanup_queue(info->rq);
blk_mq_free_tag_set(&info->tag_set);
info->rq = NULL; info->rq = NULL;
put_disk(info->gd); put_disk(info->gd);
info->gd = NULL; info->gd = NULL;
} }
/* Must be called with io_lock holded */
static void kick_pending_request_queues(struct blkfront_info *info) static void kick_pending_request_queues(struct blkfront_info *info)
{ {
if (!RING_FULL(&info->ring)) { if (!RING_FULL(&info->ring))
/* Re-enable calldowns. */ blk_mq_start_stopped_hw_queues(info->rq, true);
blk_start_queue(info->rq);
/* Kick things off immediately. */
do_blkif_request(info->rq);
}
} }
static void blkif_restart_queue(struct work_struct *work) static void blkif_restart_queue(struct work_struct *work)
...@@ -964,7 +960,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) ...@@ -964,7 +960,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
/* No more blkif_request(). */ /* No more blkif_request(). */
if (info->rq) if (info->rq)
blk_stop_queue(info->rq); blk_mq_stop_hw_queues(info->rq);
/* Remove all persistent grants */ /* Remove all persistent grants */
if (!list_empty(&info->grants)) { if (!list_empty(&info->grants)) {
...@@ -1147,7 +1143,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) ...@@ -1147,7 +1143,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
RING_IDX i, rp; RING_IDX i, rp;
unsigned long flags; unsigned long flags;
struct blkfront_info *info = (struct blkfront_info *)dev_id; struct blkfront_info *info = (struct blkfront_info *)dev_id;
int error;
spin_lock_irqsave(&info->io_lock, flags); spin_lock_irqsave(&info->io_lock, flags);
...@@ -1188,37 +1183,37 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) ...@@ -1188,37 +1183,37 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
continue; continue;
} }
error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; req->errors = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
switch (bret->operation) { switch (bret->operation) {
case BLKIF_OP_DISCARD: case BLKIF_OP_DISCARD:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
struct request_queue *rq = info->rq; struct request_queue *rq = info->rq;
printk(KERN_WARNING "blkfront: %s: %s op failed\n", printk(KERN_WARNING "blkfront: %s: %s op failed\n",
info->gd->disk_name, op_name(bret->operation)); info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP; req->errors = -EOPNOTSUPP;
info->feature_discard = 0; info->feature_discard = 0;
info->feature_secdiscard = 0; info->feature_secdiscard = 0;
queue_flag_clear(QUEUE_FLAG_DISCARD, rq); queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq); queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
} }
__blk_end_request_all(req, error); blk_mq_complete_request(req);
break; break;
case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_WRITE_BARRIER:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
printk(KERN_WARNING "blkfront: %s: %s op failed\n", printk(KERN_WARNING "blkfront: %s: %s op failed\n",
info->gd->disk_name, op_name(bret->operation)); info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP; req->errors = -EOPNOTSUPP;
} }
if (unlikely(bret->status == BLKIF_RSP_ERROR && if (unlikely(bret->status == BLKIF_RSP_ERROR &&
info->shadow[id].req.u.rw.nr_segments == 0)) { info->shadow[id].req.u.rw.nr_segments == 0)) {
printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
info->gd->disk_name, op_name(bret->operation)); info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP; req->errors = -EOPNOTSUPP;
} }
if (unlikely(error)) { if (unlikely(req->errors)) {
if (error == -EOPNOTSUPP) if (req->errors == -EOPNOTSUPP)
error = 0; req->errors = 0;
info->feature_flush = 0; info->feature_flush = 0;
xlvbd_flush(info); xlvbd_flush(info);
} }
...@@ -1229,7 +1224,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) ...@@ -1229,7 +1224,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
"request: %x\n", bret->status); "request: %x\n", bret->status);
__blk_end_request_all(req, error); blk_mq_complete_request(req);
break; break;
default: default:
BUG(); BUG();
...@@ -1558,28 +1553,6 @@ static int blkif_recover(struct blkfront_info *info) ...@@ -1558,28 +1553,6 @@ static int blkif_recover(struct blkfront_info *info)
kfree(copy); kfree(copy);
/*
* Empty the queue, this is important because we might have
* requests in the queue with more segments than what we
* can handle now.
*/
spin_lock_irq(&info->io_lock);
while ((req = blk_fetch_request(info->rq)) != NULL) {
if (req->cmd_flags &
(REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
list_add(&req->queuelist, &requests);
continue;
}
merge_bio.head = req->bio;
merge_bio.tail = req->biotail;
bio_list_merge(&bio_list, &merge_bio);
req->bio = NULL;
if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
pr_alert("diskcache flush request found!\n");
__blk_end_request_all(req, 0);
}
spin_unlock_irq(&info->io_lock);
xenbus_switch_state(info->xbdev, XenbusStateConnected); xenbus_switch_state(info->xbdev, XenbusStateConnected);
spin_lock_irq(&info->io_lock); spin_lock_irq(&info->io_lock);
...@@ -1594,9 +1567,10 @@ static int blkif_recover(struct blkfront_info *info) ...@@ -1594,9 +1567,10 @@ static int blkif_recover(struct blkfront_info *info)
/* Requeue pending requests (flush or discard) */ /* Requeue pending requests (flush or discard) */
list_del_init(&req->queuelist); list_del_init(&req->queuelist);
BUG_ON(req->nr_phys_segments > segs); BUG_ON(req->nr_phys_segments > segs);
blk_requeue_request(info->rq, req); blk_mq_requeue_request(req);
} }
spin_unlock_irq(&info->io_lock); spin_unlock_irq(&info->io_lock);
blk_mq_kick_requeue_list(info->rq);
while ((bio = bio_list_pop(&bio_list)) != NULL) { while ((bio = bio_list_pop(&bio_list)) != NULL) {
/* Traverse the list of pending bios and re-queue them */ /* Traverse the list of pending bios and re-queue them */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment