Commit 54bdd67d authored by Keith Busch's avatar Keith Busch Committed by Jens Axboe

blk-mq: remove hybrid polling

io_uring provides the only way user space can poll completions, and that
always sets BLK_POLL_NOSLEEP. This effectively makes hybrid polling dead
code, so remove it and everything supporting it.

Hybrid polling was effectively killed off with 9650b453, "block:
ignore RWF_HIPRI hint for sync dio", but still potentially reachable
through io_uring until d729cf9a, "io_uring: don't sleep when
polling for I/O", but hybrid polling probably should not have been
reachable through that async interface from the beginning.

Fixes: 9650b453 ("block: ignore RWF_HIPRI hint for sync dio")
Fixes: d729cf9a ("io_uring: don't sleep when polling for I/O")
Signed-off-by: default avatarKeith Busch <kbusch@kernel.org>
Link: https://lore.kernel.org/r/20230320194926.3353144-1-kbusch@meta.comSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 4cf2c3ab
...@@ -336,18 +336,11 @@ What: /sys/block/<disk>/queue/io_poll_delay ...@@ -336,18 +336,11 @@ What: /sys/block/<disk>/queue/io_poll_delay
Date: November 2016 Date: November 2016
Contact: linux-block@vger.kernel.org Contact: linux-block@vger.kernel.org
Description: Description:
[RW] If polling is enabled, this controls what kind of polling [RW] This was used to control what kind of polling will be
will be performed. It defaults to -1, which is classic polling. performed. It is now fixed to -1, which is classic polling.
In this mode, the CPU will repeatedly ask for completions In this mode, the CPU will repeatedly ask for completions
without giving up any time. If set to 0, a hybrid polling mode without giving up any time.
is used, where the kernel will attempt to make an educated guess <deprecated>
at when the IO will complete. Based on this guess, the kernel
will put the process issuing IO to sleep for an amount of time,
before entering a classic poll loop. This mode might be a little
slower than pure classic polling, but it will be more efficient.
If set to a value larger than 0, the kernel will put the process
issuing IO to sleep for this amount of microseconds before
entering classic polling.
What: /sys/block/<disk>/queue/io_timeout What: /sys/block/<disk>/queue/io_timeout
......
...@@ -263,13 +263,7 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head) ...@@ -263,13 +263,7 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
static void blk_free_queue(struct request_queue *q) static void blk_free_queue(struct request_queue *q)
{ {
if (q->poll_stat)
blk_stat_remove_callback(q, q->poll_cb);
blk_stat_free_callback(q->poll_cb);
blk_free_queue_stats(q->stats); blk_free_queue_stats(q->stats);
kfree(q->poll_stat);
if (queue_is_mq(q)) if (queue_is_mq(q))
blk_mq_release(q); blk_mq_release(q);
......
...@@ -15,33 +15,8 @@ ...@@ -15,33 +15,8 @@
#include "blk-mq-tag.h" #include "blk-mq-tag.h"
#include "blk-rq-qos.h" #include "blk-rq-qos.h"
static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
{
if (stat->nr_samples) {
seq_printf(m, "samples=%d, mean=%llu, min=%llu, max=%llu",
stat->nr_samples, stat->mean, stat->min, stat->max);
} else {
seq_puts(m, "samples=0");
}
}
static int queue_poll_stat_show(void *data, struct seq_file *m) static int queue_poll_stat_show(void *data, struct seq_file *m)
{ {
struct request_queue *q = data;
int bucket;
if (!q->poll_stat)
return 0;
for (bucket = 0; bucket < (BLK_MQ_POLL_STATS_BKTS / 2); bucket++) {
seq_printf(m, "read (%d Bytes): ", 1 << (9 + bucket));
print_stat(m, &q->poll_stat[2 * bucket]);
seq_puts(m, "\n");
seq_printf(m, "write (%d Bytes): ", 1 << (9 + bucket));
print_stat(m, &q->poll_stat[2 * bucket + 1]);
seq_puts(m, "\n");
}
return 0; return 0;
} }
...@@ -282,7 +257,6 @@ static const char *const rqf_name[] = { ...@@ -282,7 +257,6 @@ static const char *const rqf_name[] = {
RQF_NAME(STATS), RQF_NAME(STATS),
RQF_NAME(SPECIAL_PAYLOAD), RQF_NAME(SPECIAL_PAYLOAD),
RQF_NAME(ZONE_WRITE_LOCKED), RQF_NAME(ZONE_WRITE_LOCKED),
RQF_NAME(MQ_POLL_SLEPT),
RQF_NAME(TIMED_OUT), RQF_NAME(TIMED_OUT),
RQF_NAME(ELV), RQF_NAME(ELV),
RQF_NAME(RESV), RQF_NAME(RESV),
......
...@@ -46,51 +46,15 @@ ...@@ -46,51 +46,15 @@
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
static void blk_mq_poll_stats_start(struct request_queue *q);
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
static int blk_mq_poll_stats_bkt(const struct request *rq)
{
int ddir, sectors, bucket;
ddir = rq_data_dir(rq);
sectors = blk_rq_stats_sectors(rq);
bucket = ddir + 2 * ilog2(sectors);
if (bucket < 0)
return -1;
else if (bucket >= BLK_MQ_POLL_STATS_BKTS)
return ddir + BLK_MQ_POLL_STATS_BKTS - 2;
return bucket;
}
#define BLK_QC_T_SHIFT 16
#define BLK_QC_T_INTERNAL (1U << 31)
static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q, static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
blk_qc_t qc) blk_qc_t qc)
{ {
return xa_load(&q->hctx_table, return xa_load(&q->hctx_table, qc);
(qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT);
}
static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx,
blk_qc_t qc)
{
unsigned int tag = qc & ((1U << BLK_QC_T_SHIFT) - 1);
if (qc & BLK_QC_T_INTERNAL)
return blk_mq_tag_to_rq(hctx->sched_tags, tag);
return blk_mq_tag_to_rq(hctx->tags, tag);
} }
static inline blk_qc_t blk_rq_to_qc(struct request *rq) static inline blk_qc_t blk_rq_to_qc(struct request *rq)
{ {
return (rq->mq_hctx->queue_num << BLK_QC_T_SHIFT) | return rq->mq_hctx->queue_num;
(rq->tag != -1 ?
rq->tag : (rq->internal_tag | BLK_QC_T_INTERNAL));
} }
/* /*
...@@ -1038,10 +1002,8 @@ static inline void blk_account_io_start(struct request *req) ...@@ -1038,10 +1002,8 @@ static inline void blk_account_io_start(struct request *req)
static inline void __blk_mq_end_request_acct(struct request *rq, u64 now) static inline void __blk_mq_end_request_acct(struct request *rq, u64 now)
{ {
if (rq->rq_flags & RQF_STATS) { if (rq->rq_flags & RQF_STATS)
blk_mq_poll_stats_start(rq->q);
blk_stat_add(rq, now); blk_stat_add(rq, now);
}
blk_mq_sched_completed_request(rq, now); blk_mq_sched_completed_request(rq, now);
blk_account_io_done(rq, now); blk_account_io_done(rq, now);
...@@ -4222,14 +4184,8 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, ...@@ -4222,14 +4184,8 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
/* mark the queue as mq asap */ /* mark the queue as mq asap */
q->mq_ops = set->ops; q->mq_ops = set->ops;
q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn,
blk_mq_poll_stats_bkt,
BLK_MQ_POLL_STATS_BKTS, q);
if (!q->poll_cb)
goto err_exit;
if (blk_mq_alloc_ctxs(q)) if (blk_mq_alloc_ctxs(q))
goto err_poll; goto err_exit;
/* init q->mq_kobj and sw queues' kobjects */ /* init q->mq_kobj and sw queues' kobjects */
blk_mq_sysfs_init(q); blk_mq_sysfs_init(q);
...@@ -4257,11 +4213,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, ...@@ -4257,11 +4213,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
q->nr_requests = set->queue_depth; q->nr_requests = set->queue_depth;
/*
* Default to classic polling
*/
q->poll_nsec = BLK_MQ_POLL_CLASSIC;
blk_mq_init_cpu_queues(q, set->nr_hw_queues); blk_mq_init_cpu_queues(q, set->nr_hw_queues);
blk_mq_add_queue_tag_set(set, q); blk_mq_add_queue_tag_set(set, q);
blk_mq_map_swqueue(q); blk_mq_map_swqueue(q);
...@@ -4269,9 +4220,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, ...@@ -4269,9 +4220,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
err_hctxs: err_hctxs:
blk_mq_release(q); blk_mq_release(q);
err_poll:
blk_stat_free_callback(q->poll_cb);
q->poll_cb = NULL;
err_exit: err_exit:
q->mq_ops = NULL; q->mq_ops = NULL;
return -ENOMEM; return -ENOMEM;
...@@ -4768,138 +4716,8 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) ...@@ -4768,138 +4716,8 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
} }
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
/* Enable polling stats and return whether they were already enabled. */ int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
static bool blk_poll_stats_enable(struct request_queue *q) unsigned int flags)
{
if (q->poll_stat)
return true;
return blk_stats_alloc_enable(q);
}
static void blk_mq_poll_stats_start(struct request_queue *q)
{
/*
* We don't arm the callback if polling stats are not enabled or the
* callback is already active.
*/
if (!q->poll_stat || blk_stat_is_active(q->poll_cb))
return;
blk_stat_activate_msecs(q->poll_cb, 100);
}
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
{
struct request_queue *q = cb->data;
int bucket;
for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
if (cb->stat[bucket].nr_samples)
q->poll_stat[bucket] = cb->stat[bucket];
}
}
static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
struct request *rq)
{
unsigned long ret = 0;
int bucket;
/*
* If stats collection isn't on, don't sleep but turn it on for
* future users
*/
if (!blk_poll_stats_enable(q))
return 0;
/*
* As an optimistic guess, use half of the mean service time
* for this type of request. We can (and should) make this smarter.
* For instance, if the completion latencies are tight, we can
* get closer than just half the mean. This is especially
* important on devices where the completion latencies are longer
* than ~10 usec. We do use the stats for the relevant IO size
* if available which does lead to better estimates.
*/
bucket = blk_mq_poll_stats_bkt(rq);
if (bucket < 0)
return ret;
if (q->poll_stat[bucket].nr_samples)
ret = (q->poll_stat[bucket].mean + 1) / 2;
return ret;
}
static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc)
{
struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, qc);
struct request *rq = blk_qc_to_rq(hctx, qc);
struct hrtimer_sleeper hs;
enum hrtimer_mode mode;
unsigned int nsecs;
ktime_t kt;
/*
* If a request has completed on queue that uses an I/O scheduler, we
* won't get back a request from blk_qc_to_rq.
*/
if (!rq || (rq->rq_flags & RQF_MQ_POLL_SLEPT))
return false;
/*
* If we get here, hybrid polling is enabled. Hence poll_nsec can be:
*
* 0: use half of prev avg
* >0: use this specific value
*/
if (q->poll_nsec > 0)
nsecs = q->poll_nsec;
else
nsecs = blk_mq_poll_nsecs(q, rq);
if (!nsecs)
return false;
rq->rq_flags |= RQF_MQ_POLL_SLEPT;
/*
* This will be replaced with the stats tracking code, using
* 'avg_completion_time / 2' as the pre-sleep target.
*/
kt = nsecs;
mode = HRTIMER_MODE_REL;
hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
hrtimer_set_expires(&hs.timer, kt);
do {
if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
break;
set_current_state(TASK_UNINTERRUPTIBLE);
hrtimer_sleeper_start_expires(&hs, mode);
if (hs.task)
io_schedule();
hrtimer_cancel(&hs.timer);
mode = HRTIMER_MODE_ABS;
} while (hs.task && !signal_pending(current));
__set_current_state(TASK_RUNNING);
destroy_hrtimer_on_stack(&hs.timer);
/*
* If we sleep, have the caller restart the poll loop to reset the
* state. Like for the other success return cases, the caller is
* responsible for checking if the IO completed. If the IO isn't
* complete, we'll get called again and will go straight to the busy
* poll loop.
*/
return true;
}
static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
struct io_comp_batch *iob, unsigned int flags)
{ {
struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie); struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie);
long state = get_current_state(); long state = get_current_state();
...@@ -4926,17 +4744,6 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, ...@@ -4926,17 +4744,6 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
return 0; return 0;
} }
int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
unsigned int flags)
{
if (!(flags & BLK_POLL_NOSLEEP) &&
q->poll_nsec != BLK_MQ_POLL_CLASSIC) {
if (blk_mq_poll_hybrid(q, cookie))
return 1;
}
return blk_mq_poll_classic(q, cookie, iob, flags);
}
unsigned int blk_mq_rq_cpu(struct request *rq) unsigned int blk_mq_rq_cpu(struct request *rq)
{ {
return rq->mq_ctx->cpu; return rq->mq_ctx->cpu;
......
...@@ -231,21 +231,3 @@ void blk_free_queue_stats(struct blk_queue_stats *stats) ...@@ -231,21 +231,3 @@ void blk_free_queue_stats(struct blk_queue_stats *stats)
kfree(stats); kfree(stats);
} }
bool blk_stats_alloc_enable(struct request_queue *q)
{
struct blk_rq_stat *poll_stat;
poll_stat = kcalloc(BLK_MQ_POLL_STATS_BKTS, sizeof(*poll_stat),
GFP_ATOMIC);
if (!poll_stat)
return false;
if (cmpxchg(&q->poll_stat, NULL, poll_stat) != NULL) {
kfree(poll_stat);
return true;
}
blk_stat_add_callback(q, q->poll_cb);
return false;
}
...@@ -408,35 +408,12 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) ...@@ -408,35 +408,12 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
static ssize_t queue_poll_delay_show(struct request_queue *q, char *page) static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
{ {
int val; return sprintf(page, "%d\n", -1);
if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
val = BLK_MQ_POLL_CLASSIC;
else
val = q->poll_nsec / 1000;
return sprintf(page, "%d\n", val);
} }
static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page, static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
size_t count) size_t count)
{ {
int err, val;
if (!q->mq_ops || !q->mq_ops->poll)
return -EINVAL;
err = kstrtoint(page, 10, &val);
if (err < 0)
return err;
if (val == BLK_MQ_POLL_CLASSIC)
q->poll_nsec = BLK_MQ_POLL_CLASSIC;
else if (val >= 0)
q->poll_nsec = val * 1000;
else
return -EINVAL;
return count; return count;
} }
......
...@@ -57,8 +57,6 @@ typedef __u32 __bitwise req_flags_t; ...@@ -57,8 +57,6 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) #define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
/* The per-zone write lock is held for this request */ /* The per-zone write lock is held for this request */
#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
/* already slept for hybrid poll */
#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20))
/* ->timeout has been called, don't expire again */ /* ->timeout has been called, don't expire again */
#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
/* queue has elevator attached */ /* queue has elevator attached */
......
...@@ -44,12 +44,6 @@ extern const struct device_type disk_type; ...@@ -44,12 +44,6 @@ extern const struct device_type disk_type;
extern struct device_type part_type; extern struct device_type part_type;
extern struct class block_class; extern struct class block_class;
/* Must be consistent with blk_mq_poll_stats_bkt() */
#define BLK_MQ_POLL_STATS_BKTS 16
/* Doing classic polling */
#define BLK_MQ_POLL_CLASSIC -1
/* /*
* Maximum number of blkcg policies allowed to be registered concurrently. * Maximum number of blkcg policies allowed to be registered concurrently.
* Defined here to simplify include dependency. * Defined here to simplify include dependency.
...@@ -468,10 +462,6 @@ struct request_queue { ...@@ -468,10 +462,6 @@ struct request_queue {
#endif #endif
unsigned int rq_timeout; unsigned int rq_timeout;
int poll_nsec;
struct blk_stat_callback *poll_cb;
struct blk_rq_stat *poll_stat;
struct timer_list timeout; struct timer_list timeout;
struct work_struct timeout_work; struct work_struct timeout_work;
...@@ -870,8 +860,6 @@ blk_status_t errno_to_blk_status(int errno); ...@@ -870,8 +860,6 @@ blk_status_t errno_to_blk_status(int errno);
/* only poll the hardware once, don't continue until a completion was found */ /* only poll the hardware once, don't continue until a completion was found */
#define BLK_POLL_ONESHOT (1 << 0) #define BLK_POLL_ONESHOT (1 << 0)
/* do not sleep to wait for the expected completion time */
#define BLK_POLL_NOSLEEP (1 << 1)
int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags); int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags);
int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob,
unsigned int flags); unsigned int flags);
......
...@@ -1002,7 +1002,7 @@ void io_rw_fail(struct io_kiocb *req) ...@@ -1002,7 +1002,7 @@ void io_rw_fail(struct io_kiocb *req)
int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
{ {
struct io_wq_work_node *pos, *start, *prev; struct io_wq_work_node *pos, *start, *prev;
unsigned int poll_flags = BLK_POLL_NOSLEEP; unsigned int poll_flags = 0;
DEFINE_IO_COMP_BATCH(iob); DEFINE_IO_COMP_BATCH(iob);
int nr_events = 0; int nr_events = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment