Commit 05229bee authored by Jens Axboe's avatar Jens Axboe

block: add block polling support

Add basic support for polling for specific IO to complete. This uses
the cookie that blk-mq passes back, which enables the block layer
to pass this cookie to the driver to spin for a specific request.

This will be combined with request latency tracking, so we can make
qualified decisions about when to poll and when not to. For now, for
benchmark purposes, we add a sysfs file that controls whether polling
is enabled or not.
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
Acked-by: default avatarChristoph Hellwig <hch@lst.de>
Acked-by: default avatarKeith Busch <keith.busch@intel.com>
parent 7b371636
...@@ -3312,6 +3312,47 @@ void blk_finish_plug(struct blk_plug *plug) ...@@ -3312,6 +3312,47 @@ void blk_finish_plug(struct blk_plug *plug)
} }
EXPORT_SYMBOL(blk_finish_plug); EXPORT_SYMBOL(blk_finish_plug);
bool blk_poll(struct request_queue *q, blk_qc_t cookie)
{
struct blk_plug *plug;
long state;
if (!q->mq_ops || !q->mq_ops->poll || !blk_qc_t_valid(cookie) ||
!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
return false;
plug = current->plug;
if (plug)
blk_flush_plug_list(plug, false);
state = current->state;
while (!need_resched()) {
unsigned int queue_num = blk_qc_t_to_queue_num(cookie);
struct blk_mq_hw_ctx *hctx = q->queue_hw_ctx[queue_num];
int ret;
hctx->poll_invoked++;
ret = q->mq_ops->poll(hctx, blk_qc_t_to_tag(cookie));
if (ret > 0) {
hctx->poll_success++;
set_current_state(TASK_RUNNING);
return true;
}
if (signal_pending_state(state, current))
set_current_state(TASK_RUNNING);
if (current->state == TASK_RUNNING)
return true;
if (ret < 0)
break;
cpu_relax();
}
return false;
}
#ifdef CONFIG_PM #ifdef CONFIG_PM
/** /**
* blk_pm_runtime_init - Block layer runtime PM initialization routine * blk_pm_runtime_init - Block layer runtime PM initialization routine
......
...@@ -174,6 +174,11 @@ static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page) ...@@ -174,6 +174,11 @@ static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page)
return ret; return ret;
} }
static ssize_t blk_mq_hw_sysfs_poll_show(struct blk_mq_hw_ctx *hctx, char *page)
{
return sprintf(page, "invoked=%lu, success=%lu\n", hctx->poll_invoked, hctx->poll_success);
}
static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx, static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx,
char *page) char *page)
{ {
...@@ -295,6 +300,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = { ...@@ -295,6 +300,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
.attr = {.name = "cpu_list", .mode = S_IRUGO }, .attr = {.name = "cpu_list", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_cpus_show, .show = blk_mq_hw_sysfs_cpus_show,
}; };
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
.attr = {.name = "io_poll", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_poll_show,
};
static struct attribute *default_hw_ctx_attrs[] = { static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_queued.attr, &blk_mq_hw_sysfs_queued.attr,
...@@ -304,6 +313,7 @@ static struct attribute *default_hw_ctx_attrs[] = { ...@@ -304,6 +313,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_tags.attr, &blk_mq_hw_sysfs_tags.attr,
&blk_mq_hw_sysfs_cpus.attr, &blk_mq_hw_sysfs_cpus.attr,
&blk_mq_hw_sysfs_active.attr, &blk_mq_hw_sysfs_active.attr,
&blk_mq_hw_sysfs_poll.attr,
NULL, NULL,
}; };
......
...@@ -317,6 +317,34 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) ...@@ -317,6 +317,34 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
return ret; return ret;
} }
static ssize_t queue_poll_show(struct request_queue *q, char *page)
{
return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), page);
}
static ssize_t queue_poll_store(struct request_queue *q, const char *page,
size_t count)
{
unsigned long poll_on;
ssize_t ret;
if (!q->mq_ops || !q->mq_ops->poll)
return -EINVAL;
ret = queue_var_store(&poll_on, page, count);
if (ret < 0)
return ret;
spin_lock_irq(q->queue_lock);
if (poll_on)
queue_flag_set(QUEUE_FLAG_POLL, q);
else
queue_flag_clear(QUEUE_FLAG_POLL, q);
spin_unlock_irq(q->queue_lock);
return ret;
}
static struct queue_sysfs_entry queue_requests_entry = { static struct queue_sysfs_entry queue_requests_entry = {
.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
.show = queue_requests_show, .show = queue_requests_show,
...@@ -442,6 +470,12 @@ static struct queue_sysfs_entry queue_random_entry = { ...@@ -442,6 +470,12 @@ static struct queue_sysfs_entry queue_random_entry = {
.store = queue_store_random, .store = queue_store_random,
}; };
static struct queue_sysfs_entry queue_poll_entry = {
.attr = {.name = "io_poll", .mode = S_IRUGO | S_IWUSR },
.show = queue_poll_show,
.store = queue_poll_store,
};
static struct attribute *default_attrs[] = { static struct attribute *default_attrs[] = {
&queue_requests_entry.attr, &queue_requests_entry.attr,
&queue_ra_entry.attr, &queue_ra_entry.attr,
...@@ -466,6 +500,7 @@ static struct attribute *default_attrs[] = { ...@@ -466,6 +500,7 @@ static struct attribute *default_attrs[] = {
&queue_rq_affinity_entry.attr, &queue_rq_affinity_entry.attr,
&queue_iostats_entry.attr, &queue_iostats_entry.attr,
&queue_random_entry.attr, &queue_random_entry.attr,
&queue_poll_entry.attr,
NULL, NULL,
}; };
......
...@@ -59,6 +59,9 @@ struct blk_mq_hw_ctx { ...@@ -59,6 +59,9 @@ struct blk_mq_hw_ctx {
struct blk_mq_cpu_notifier cpu_notifier; struct blk_mq_cpu_notifier cpu_notifier;
struct kobject kobj; struct kobject kobj;
unsigned long poll_invoked;
unsigned long poll_success;
}; };
struct blk_mq_tag_set { struct blk_mq_tag_set {
...@@ -97,6 +100,8 @@ typedef void (exit_request_fn)(void *, struct request *, unsigned int, ...@@ -97,6 +100,8 @@ typedef void (exit_request_fn)(void *, struct request *, unsigned int,
typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
bool); bool);
typedef void (busy_tag_iter_fn)(struct request *, void *, bool); typedef void (busy_tag_iter_fn)(struct request *, void *, bool);
typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int);
struct blk_mq_ops { struct blk_mq_ops {
/* /*
...@@ -114,6 +119,11 @@ struct blk_mq_ops { ...@@ -114,6 +119,11 @@ struct blk_mq_ops {
*/ */
timeout_fn *timeout; timeout_fn *timeout;
/*
* Called to poll for completion of a specific tag.
*/
poll_fn *poll;
softirq_done_fn *complete; softirq_done_fn *complete;
/* /*
......
...@@ -487,6 +487,7 @@ struct request_queue { ...@@ -487,6 +487,7 @@ struct request_queue {
#define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ #define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */
#define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ #define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */
#define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/ #define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/
#define QUEUE_FLAG_POLL 22 /* IO polling enabled if set */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \ (1 << QUEUE_FLAG_STACKABLE) | \
...@@ -814,6 +815,8 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *, ...@@ -814,6 +815,8 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *,
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
struct request *, int, rq_end_io_fn *); struct request *, int, rq_end_io_fn *);
bool blk_poll(struct request_queue *q, blk_qc_t cookie);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev) static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
{ {
return bdev->bd_disk->queue; /* this is never NULL */ return bdev->bd_disk->queue; /* this is never NULL */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment