Commit 24bfd19b authored by Paolo Valente's avatar Paolo Valente Committed by Jens Axboe

block, bfq: update blkio stats outside the scheduler lock

bfq invokes various blkg_*stats_* functions to update the statistics
contained in the special files blkio.bfq.* in the blkio controller
groups, i.e., the I/O accounting related to the proportional-share
policy provided by bfq. The execution of these functions takes a
considerable percentage, about 40%, of the total per-request execution
time of bfq (i.e., of the sum of the execution time of all the bfq
functions that have to be executed to process an I/O request from its
creation to its destruction).  This reduces the request-processing
rate sustainable by bfq noticeably, even on a multicore CPU. In fact,
the bfq functions that invoke blkg_*stats_* functions cannot be
executed in parallel with the rest of the code of bfq, because both
are executed under the same same per-device scheduler lock.

To reduce this slowdown, this commit moves, wherever possible, the
invocation of these functions (more precisely, of the bfq functions
that invoke blkg_*stats_* functions) outside the critical sections
protected by the scheduler lock.

With this change, and with all blkio.bfq.* statistics enabled, the
throughput grows, e.g., from 250 to 310 KIOPS (+25%) on an Intel
i7-4850HQ, in case of 8 threads doing random I/O in parallel on
null_blk, with the latter configured with 0 latency. We obtained the
same or higher throughput boosts, up to +30%, with other processors
(some figures are reported in the documentation). For our tests, we
used the script [1], with which our results can be easily reproduced.

NOTE. This commit still protects the invocation of blkg_*stats_*
functions with the request_queue lock, because the group these
functions are invoked on may otherwise disappear before or while these
functions are executed.  Fortunately, tests without even this lock
show, by difference, that the serialization caused by this lock has a
little impact (at most ~5% of throughput reduction).

[1] https://github.com/Algodev-github/IOSpeedTested-by: default avatarLee Tibbert <lee.tibbert@gmail.com>
Tested-by: default avatarOleksandr Natalenko <oleksandr@natalenko.name>
Signed-off-by: default avatarPaolo Valente <paolo.valente@linaro.org>
Signed-off-by: default avatarLuca Miccio <lucmiccio@gmail.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 614822f8
...@@ -26,9 +26,9 @@ the limits on slow or average CPUs, here are BFQ limits for three ...@@ -26,9 +26,9 @@ the limits on slow or average CPUs, here are BFQ limits for three
different CPUs, on, respectively, an average laptop, an old desktop, different CPUs, on, respectively, an average laptop, an old desktop,
and a cheap embedded system, in case full hierarchical support is and a cheap embedded system, in case full hierarchical support is
enabled (i.e., CONFIG_BFQ_GROUP_IOSCHED is set): enabled (i.e., CONFIG_BFQ_GROUP_IOSCHED is set):
- Intel i7-4850HQ: 250 KIOPS - Intel i7-4850HQ: 310 KIOPS
- AMD A8-3850: 170 KIOPS - AMD A8-3850: 200 KIOPS
- ARM CortexTM-A53 Octa-core: 45 KIOPS - ARM CortexTM-A53 Octa-core: 56 KIOPS
BFQ works for multi-queue devices too. BFQ works for multi-queue devices too.
......
...@@ -2228,7 +2228,6 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd, ...@@ -2228,7 +2228,6 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
struct bfq_queue *bfqq) struct bfq_queue *bfqq)
{ {
if (bfqq) { if (bfqq) {
bfqg_stats_update_avg_queue_size(bfqq_group(bfqq));
bfq_clear_bfqq_fifo_expire(bfqq); bfq_clear_bfqq_fifo_expire(bfqq);
bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8; bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8;
...@@ -3469,7 +3468,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) ...@@ -3469,7 +3468,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
*/ */
bfq_clear_bfqq_wait_request(bfqq); bfq_clear_bfqq_wait_request(bfqq);
hrtimer_try_to_cancel(&bfqd->idle_slice_timer); hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
bfqg_stats_update_idle_time(bfqq_group(bfqq));
} }
goto keep_queue; goto keep_queue;
} }
...@@ -3695,15 +3693,67 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) ...@@ -3695,15 +3693,67 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
{ {
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
struct request *rq; struct request *rq;
#ifdef CONFIG_BFQ_GROUP_IOSCHED
struct bfq_queue *in_serv_queue, *bfqq;
bool waiting_rq, idle_timer_disabled;
#endif
spin_lock_irq(&bfqd->lock); spin_lock_irq(&bfqd->lock);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
in_serv_queue = bfqd->in_service_queue;
waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
rq = __bfq_dispatch_request(hctx); rq = __bfq_dispatch_request(hctx);
if (rq && RQ_BFQQ(rq))
bfqg_stats_update_io_remove(bfqq_group(RQ_BFQQ(rq)), idle_timer_disabled =
rq->cmd_flags); waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
#else
rq = __bfq_dispatch_request(hctx);
#endif
spin_unlock_irq(&bfqd->lock); spin_unlock_irq(&bfqd->lock);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
bfqq = rq ? RQ_BFQQ(rq) : NULL;
if (!idle_timer_disabled && !bfqq)
return rq;
/*
* rq and bfqq are guaranteed to exist until this function
* ends, for the following reasons. First, rq can be
* dispatched to the device, and then can be completed and
* freed, only after this function ends. Second, rq cannot be
* merged (and thus freed because of a merge) any longer,
* because it has already started. Thus rq cannot be freed
* before this function ends, and, since rq has a reference to
* bfqq, the same guarantee holds for bfqq too.
*
* In addition, the following queue lock guarantees that
* bfqq_group(bfqq) exists as well.
*/
spin_lock_irq(hctx->queue->queue_lock);
if (idle_timer_disabled)
/*
* Since the idle timer has been disabled,
* in_serv_queue contained some request when
* __bfq_dispatch_request was invoked above, which
* implies that rq was picked exactly from
* in_serv_queue. Thus in_serv_queue == bfqq, and is
* therefore guaranteed to exist because of the above
* arguments.
*/
bfqg_stats_update_idle_time(bfqq_group(in_serv_queue));
if (bfqq) {
struct bfq_group *bfqg = bfqq_group(bfqq);
bfqg_stats_update_avg_queue_size(bfqg);
bfqg_stats_set_start_empty_time(bfqg);
bfqg_stats_update_io_remove(bfqg, rq->cmd_flags);
}
spin_unlock_irq(hctx->queue->queue_lock);
#endif
return rq; return rq;
} }
...@@ -4161,7 +4211,6 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, ...@@ -4161,7 +4211,6 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
*/ */
bfq_clear_bfqq_wait_request(bfqq); bfq_clear_bfqq_wait_request(bfqq);
hrtimer_try_to_cancel(&bfqd->idle_slice_timer); hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
bfqg_stats_update_idle_time(bfqq_group(bfqq));
/* /*
* The queue is not empty, because a new request just * The queue is not empty, because a new request just
...@@ -4176,10 +4225,12 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, ...@@ -4176,10 +4225,12 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
} }
} }
static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) /* returns true if it causes the idle timer to be disabled */
static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
{ {
struct bfq_queue *bfqq = RQ_BFQQ(rq), struct bfq_queue *bfqq = RQ_BFQQ(rq),
*new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true); *new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
bool waiting, idle_timer_disabled = false;
if (new_bfqq) { if (new_bfqq) {
if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq) if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
...@@ -4213,12 +4264,16 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) ...@@ -4213,12 +4264,16 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
bfqq = new_bfqq; bfqq = new_bfqq;
} }
waiting = bfqq && bfq_bfqq_wait_request(bfqq);
bfq_add_request(rq); bfq_add_request(rq);
idle_timer_disabled = waiting && !bfq_bfqq_wait_request(bfqq);
rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
list_add_tail(&rq->queuelist, &bfqq->fifo); list_add_tail(&rq->queuelist, &bfqq->fifo);
bfq_rq_enqueued(bfqd, bfqq, rq); bfq_rq_enqueued(bfqd, bfqq, rq);
return idle_timer_disabled;
} }
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
...@@ -4226,7 +4281,11 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, ...@@ -4226,7 +4281,11 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
{ {
struct request_queue *q = hctx->queue; struct request_queue *q = hctx->queue;
struct bfq_data *bfqd = q->elevator->elevator_data; struct bfq_data *bfqd = q->elevator->elevator_data;
#ifdef CONFIG_BFQ_GROUP_IOSCHED
struct bfq_queue *bfqq = RQ_BFQQ(rq); struct bfq_queue *bfqq = RQ_BFQQ(rq);
bool idle_timer_disabled = false;
unsigned int cmd_flags;
#endif
spin_lock_irq(&bfqd->lock); spin_lock_irq(&bfqd->lock);
if (blk_mq_sched_try_insert_merge(q, rq)) { if (blk_mq_sched_try_insert_merge(q, rq)) {
...@@ -4245,13 +4304,17 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, ...@@ -4245,13 +4304,17 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
else else
list_add_tail(&rq->queuelist, &bfqd->dispatch); list_add_tail(&rq->queuelist, &bfqd->dispatch);
} else { } else {
__bfq_insert_request(bfqd, rq); #ifdef CONFIG_BFQ_GROUP_IOSCHED
idle_timer_disabled = __bfq_insert_request(bfqd, rq);
/* /*
* Update bfqq, because, if a queue merge has occurred * Update bfqq, because, if a queue merge has occurred
* in __bfq_insert_request, then rq has been * in __bfq_insert_request, then rq has been
* redirected into a new queue. * redirected into a new queue.
*/ */
bfqq = RQ_BFQQ(rq); bfqq = RQ_BFQQ(rq);
#else
__bfq_insert_request(bfqd, rq);
#endif
if (rq_mergeable(rq)) { if (rq_mergeable(rq)) {
elv_rqhash_add(q, rq); elv_rqhash_add(q, rq);
...@@ -4260,10 +4323,35 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, ...@@ -4260,10 +4323,35 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
} }
} }
if (bfqq) #ifdef CONFIG_BFQ_GROUP_IOSCHED
bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, rq->cmd_flags); /*
* Cache cmd_flags before releasing scheduler lock, because rq
* may disappear afterwards (for example, because of a request
* merge).
*/
cmd_flags = rq->cmd_flags;
#endif
spin_unlock_irq(&bfqd->lock); spin_unlock_irq(&bfqd->lock);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
if (!bfqq)
return;
/*
* bfqq still exists, because it can disappear only after
* either it is merged with another queue, or the process it
* is associated with exits. But both actions must be taken by
* the same process currently executing this flow of
* instruction.
*
* In addition, the following queue lock guarantees that
* bfqq_group(bfqq) exists as well.
*/
spin_lock_irq(q->queue_lock);
bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags);
if (idle_timer_disabled)
bfqg_stats_update_idle_time(bfqq_group(bfqq));
spin_unlock_irq(q->queue_lock);
#endif
} }
static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
......
...@@ -843,7 +843,6 @@ void bfq_bfqq_served(struct bfq_queue *bfqq, int served) ...@@ -843,7 +843,6 @@ void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
st->vtime += bfq_delta(served, st->wsum); st->vtime += bfq_delta(served, st->wsum);
bfq_forget_idle(st); bfq_forget_idle(st);
} }
bfqg_stats_set_start_empty_time(bfqq_group(bfqq));
bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served); bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment