Commit 21c6e939 authored by Jens Axboe's avatar Jens Axboe

blk-mq: unify hctx delay_work and run_work

The only difference between ->run_work and ->delay_work, is that
the latter is used to defer running a queue. This is done by
marking the queue stopped, and scheduling ->delay_work to run
sometime in the future. While the queue is stopped, direct runs
or runs through ->run_work will not run the queue.

If we combine the handlers, then we need to handle two things:

1) If a delayed/stopped run is scheduled, then we should not run
   the queue before that has been completed.
2) If a queue is delayed/stopped, the handler needs to restart
   the queue. Normally a run of a queue with the stopped bit set
   would be a no-op.

Case 1 is handled by modifying a currently pending queue run
to the deadline set by the caller of blk_mq_delay_queue().
Subsequent attempts to queue a queue run will find the work
item already pending, and direct runs will see a stopped queue
as before.

Case 2 is handled by adding a new bit, BLK_MQ_S_START_ON_RUN,
that tells the work handler that it should clear a stopped
queue and run the handler.
Reviewed-by: default avatarBart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 818cd1cb
...@@ -268,10 +268,8 @@ void blk_sync_queue(struct request_queue *q) ...@@ -268,10 +268,8 @@ void blk_sync_queue(struct request_queue *q)
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
int i; int i;
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i)
cancel_delayed_work_sync(&hctx->run_work); cancel_delayed_work_sync(&hctx->run_work);
cancel_delayed_work_sync(&hctx->delay_work);
}
} else { } else {
cancel_delayed_work_sync(&q->delay_work); cancel_delayed_work_sync(&q->delay_work);
} }
......
...@@ -1221,7 +1221,6 @@ EXPORT_SYMBOL(blk_mq_queue_stopped); ...@@ -1221,7 +1221,6 @@ EXPORT_SYMBOL(blk_mq_queue_stopped);
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
{ {
cancel_delayed_work_sync(&hctx->run_work); cancel_delayed_work_sync(&hctx->run_work);
cancel_delayed_work(&hctx->delay_work);
set_bit(BLK_MQ_S_STOPPED, &hctx->state); set_bit(BLK_MQ_S_STOPPED, &hctx->state);
} }
EXPORT_SYMBOL(blk_mq_stop_hw_queue); EXPORT_SYMBOL(blk_mq_stop_hw_queue);
...@@ -1279,27 +1278,39 @@ static void blk_mq_run_work_fn(struct work_struct *work) ...@@ -1279,27 +1278,39 @@ static void blk_mq_run_work_fn(struct work_struct *work)
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work); hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
__blk_mq_run_hw_queue(hctx);
}
static void blk_mq_delay_work_fn(struct work_struct *work) /*
{ * If we are stopped, don't run the queue. The exception is if
struct blk_mq_hw_ctx *hctx; * BLK_MQ_S_START_ON_RUN is set. For that case, we auto-clear
* the STOPPED bit and run it.
*/
if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) {
if (!test_bit(BLK_MQ_S_START_ON_RUN, &hctx->state))
return;
hctx = container_of(work, struct blk_mq_hw_ctx, delay_work.work); clear_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
}
if (test_and_clear_bit(BLK_MQ_S_STOPPED, &hctx->state))
__blk_mq_run_hw_queue(hctx); __blk_mq_run_hw_queue(hctx);
} }
void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
{ {
if (unlikely(!blk_mq_hw_queue_mapped(hctx))) if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
return; return;
/*
* Stop the hw queue, then modify currently delayed work.
* This should prevent us from running the queue prematurely.
* Mark the queue as auto-clearing STOPPED when it runs.
*/
blk_mq_stop_hw_queue(hctx); blk_mq_stop_hw_queue(hctx);
kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), set_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
&hctx->delay_work, msecs_to_jiffies(msecs)); kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
&hctx->run_work,
msecs_to_jiffies(msecs));
} }
EXPORT_SYMBOL(blk_mq_delay_queue); EXPORT_SYMBOL(blk_mq_delay_queue);
...@@ -1885,7 +1896,6 @@ static int blk_mq_init_hctx(struct request_queue *q, ...@@ -1885,7 +1896,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
node = hctx->numa_node = set->numa_node; node = hctx->numa_node = set->numa_node;
INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
spin_lock_init(&hctx->lock); spin_lock_init(&hctx->lock);
INIT_LIST_HEAD(&hctx->dispatch); INIT_LIST_HEAD(&hctx->dispatch);
hctx->queue = q; hctx->queue = q;
......
...@@ -51,8 +51,6 @@ struct blk_mq_hw_ctx { ...@@ -51,8 +51,6 @@ struct blk_mq_hw_ctx {
atomic_t nr_active; atomic_t nr_active;
struct delayed_work delay_work;
struct hlist_node cpuhp_dead; struct hlist_node cpuhp_dead;
struct kobject kobj; struct kobject kobj;
...@@ -168,6 +166,7 @@ enum { ...@@ -168,6 +166,7 @@ enum {
BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_TAG_ACTIVE = 1,
BLK_MQ_S_SCHED_RESTART = 2, BLK_MQ_S_SCHED_RESTART = 2,
BLK_MQ_S_TAG_WAITING = 3, BLK_MQ_S_TAG_WAITING = 3,
BLK_MQ_S_START_ON_RUN = 4,
BLK_MQ_MAX_DEPTH = 10240, BLK_MQ_MAX_DEPTH = 10240,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment