Commit 06ede5f6 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull more block layer updates from Jens Axboe:
 "A followup pull request, with some parts that either needed a bit more
  testing before going in, merge sync, or just later arriving fixes.
  This contains:

   - Timer related updates from Kees. These were purposefully delayed
     since I didn't want to pull in a later v4.14-rc tag to my block
     tree.

   - ide-cd prep sense buffer fix from Bart. Also delayed, as not to
     clash with the late fix we put into 4.14-rc.

   - Small BFQ updates series from Luca and Paolo.

   - Single nvmet fix from James, fixing a non-functional case there.

   - Bio fast clone fix from Michael, which made bcache return the wrong
     data for some cases.

   - Legacy IO path regression hang fix from Ming"

* 'for-linus' of git://git.kernel.dk/linux-block:
  bio: ensure __bio_clone_fast copies bi_partno
  nvmet_fc: fix better length checking
  block: wake up all tasks blocked in get_request()
  block, bfq: move debug blkio stats behind CONFIG_DEBUG_BLK_CGROUP
  block, bfq: update blkio stats outside the scheduler lock
  block, bfq: add missing invocations of bfqg_stats_update_io_add/remove
  doc, block, bfq: update max IOPS sustainable with BFQ
  ide: Make ide_cdrom_prep_fs() initialize the sense buffer pointer
  md: Convert timers to use timer_setup()
  block: swim3: Convert timers to use timer_setup()
  block/aoe: Convert timers to use timer_setup()
  amifloppy: Convert timers to use timer_setup()
  block/floppy: Convert callback to pass timer_list
parents a3841f94 62530ed8
......@@ -20,12 +20,27 @@ for that device, by setting low_latency to 0. See Section 3 for
details on how to configure BFQ for the desired tradeoff between
latency and throughput, or on how to maximize throughput.
On average CPUs, the current version of BFQ can handle devices
performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a
reference, 30-50 KIOPS correspond to very high bandwidths with
sequential I/O (e.g., 8-12 GB/s if I/O requests are 256 KB large), and
to 120-200 MB/s with 4KB random I/O. BFQ is currently being tested on
multi-queue devices too.
BFQ has a non-null overhead, which limits the maximum IOPS that a CPU
can process for a device scheduled with BFQ. To give an idea of the
limits on slow or average CPUs, here are, first, the limits of BFQ for
three different CPUs, on, respectively, an average laptop, an old
desktop, and a cheap embedded system, in case full hierarchical
support is enabled (i.e., CONFIG_BFQ_GROUP_IOSCHED is set), but
CONFIG_DEBUG_BLK_CGROUP is not set (Section 4-2):
- Intel i7-4850HQ: 400 KIOPS
- AMD A8-3850: 250 KIOPS
- ARM CortexTM-A53 Octa-core: 80 KIOPS
If CONFIG_DEBUG_BLK_CGROUP is set (and of course full hierarchical
support is enabled), then the sustainable throughput with BFQ
decreases, because all blkio.bfq* statistics are created and updated
(Section 4-2). For BFQ, this leads to the following maximum
sustainable throughputs, on the same systems as above:
- Intel i7-4850HQ: 310 KIOPS
- AMD A8-3850: 200 KIOPS
- ARM CortexTM-A53 Octa-core: 56 KIOPS
BFQ works for multi-queue devices too.
The table of contents follow. Impatients can just jump to Section 3.
......@@ -500,6 +515,22 @@ BFQ-specific files is "blkio.bfq." or "io.bfq." For example, the group
parameter to set the weight of a group with BFQ is blkio.bfq.weight
or io.bfq.weight.
As for cgroups-v1 (blkio controller), the exact set of stat files
created, and kept up-to-date by bfq, depends on whether
CONFIG_DEBUG_BLK_CGROUP is set. If it is set, then bfq creates all
the stat files documented in
Documentation/cgroup-v1/blkio-controller.txt. If, instead,
CONFIG_DEBUG_BLK_CGROUP is not set, then bfq creates only the files
blkio.bfq.io_service_bytes
blkio.bfq.io_service_bytes_recursive
blkio.bfq.io_serviced
blkio.bfq.io_serviced_recursive
The value of CONFIG_DEBUG_BLK_CGROUP greatly influences the maximum
throughput sustainable with bfq, because updating the blkio.bfq.*
stats is rather costly, especially for some of the stats enabled by
CONFIG_DEBUG_BLK_CGROUP.
Parameters to set
-----------------
......
......@@ -24,7 +24,7 @@
#include "bfq-iosched.h"
#ifdef CONFIG_BFQ_GROUP_IOSCHED
#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
/* bfqg stats flags */
enum bfqg_stats_flags {
......@@ -152,6 +152,57 @@ void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
bfqg_stats_update_group_wait_time(stats);
}
void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
unsigned int op)
{
blkg_rwstat_add(&bfqg->stats.queued, op, 1);
bfqg_stats_end_empty_time(&bfqg->stats);
if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
}
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op)
{
blkg_rwstat_add(&bfqg->stats.queued, op, -1);
}
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
{
blkg_rwstat_add(&bfqg->stats.merged, op, 1);
}
void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
uint64_t io_start_time, unsigned int op)
{
struct bfqg_stats *stats = &bfqg->stats;
unsigned long long now = sched_clock();
if (time_after64(now, io_start_time))
blkg_rwstat_add(&stats->service_time, op,
now - io_start_time);
if (time_after64(io_start_time, start_time))
blkg_rwstat_add(&stats->wait_time, op,
io_start_time - start_time);
}
#else /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
unsigned int op) { }
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
uint64_t io_start_time, unsigned int op) { }
void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
#endif /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
#ifdef CONFIG_BFQ_GROUP_IOSCHED
/*
* blk-cgroup policy-related handlers
* The following functions help in converting between blk-cgroup
......@@ -229,42 +280,10 @@ void bfqg_and_blkg_put(struct bfq_group *bfqg)
blkg_put(bfqg_to_blkg(bfqg));
}
void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
unsigned int op)
{
blkg_rwstat_add(&bfqg->stats.queued, op, 1);
bfqg_stats_end_empty_time(&bfqg->stats);
if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
}
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op)
{
blkg_rwstat_add(&bfqg->stats.queued, op, -1);
}
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
{
blkg_rwstat_add(&bfqg->stats.merged, op, 1);
}
void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
uint64_t io_start_time, unsigned int op)
{
struct bfqg_stats *stats = &bfqg->stats;
unsigned long long now = sched_clock();
if (time_after64(now, io_start_time))
blkg_rwstat_add(&stats->service_time, op,
now - io_start_time);
if (time_after64(io_start_time, start_time))
blkg_rwstat_add(&stats->wait_time, op,
io_start_time - start_time);
}
/* @stats = 0 */
static void bfqg_stats_reset(struct bfqg_stats *stats)
{
#ifdef CONFIG_DEBUG_BLK_CGROUP
/* queued stats shouldn't be cleared */
blkg_rwstat_reset(&stats->merged);
blkg_rwstat_reset(&stats->service_time);
......@@ -276,6 +295,7 @@ static void bfqg_stats_reset(struct bfqg_stats *stats)
blkg_stat_reset(&stats->group_wait_time);
blkg_stat_reset(&stats->idle_time);
blkg_stat_reset(&stats->empty_time);
#endif
}
/* @to += @from */
......@@ -284,6 +304,7 @@ static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
if (!to || !from)
return;
#ifdef CONFIG_DEBUG_BLK_CGROUP
/* queued stats shouldn't be cleared */
blkg_rwstat_add_aux(&to->merged, &from->merged);
blkg_rwstat_add_aux(&to->service_time, &from->service_time);
......@@ -296,6 +317,7 @@ static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
blkg_stat_add_aux(&to->idle_time, &from->idle_time);
blkg_stat_add_aux(&to->empty_time, &from->empty_time);
#endif
}
/*
......@@ -342,6 +364,7 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
static void bfqg_stats_exit(struct bfqg_stats *stats)
{
#ifdef CONFIG_DEBUG_BLK_CGROUP
blkg_rwstat_exit(&stats->merged);
blkg_rwstat_exit(&stats->service_time);
blkg_rwstat_exit(&stats->wait_time);
......@@ -353,10 +376,12 @@ static void bfqg_stats_exit(struct bfqg_stats *stats)
blkg_stat_exit(&stats->group_wait_time);
blkg_stat_exit(&stats->idle_time);
blkg_stat_exit(&stats->empty_time);
#endif
}
static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
{
#ifdef CONFIG_DEBUG_BLK_CGROUP
if (blkg_rwstat_init(&stats->merged, gfp) ||
blkg_rwstat_init(&stats->service_time, gfp) ||
blkg_rwstat_init(&stats->wait_time, gfp) ||
......@@ -371,6 +396,7 @@ static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
bfqg_stats_exit(stats);
return -ENOMEM;
}
#endif
return 0;
}
......@@ -887,6 +913,7 @@ static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
return bfq_io_set_weight_legacy(of_css(of), NULL, weight);
}
#ifdef CONFIG_DEBUG_BLK_CGROUP
static int bfqg_print_stat(struct seq_file *sf, void *v)
{
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
......@@ -991,6 +1018,7 @@ static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
0, false);
return 0;
}
#endif /* CONFIG_DEBUG_BLK_CGROUP */
struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
{
......@@ -1028,15 +1056,6 @@ struct cftype bfq_blkcg_legacy_files[] = {
},
/* statistics, covers only the tasks in the bfqg */
{
.name = "bfq.time",
.private = offsetof(struct bfq_group, stats.time),
.seq_show = bfqg_print_stat,
},
{
.name = "bfq.sectors",
.seq_show = bfqg_print_stat_sectors,
},
{
.name = "bfq.io_service_bytes",
.private = (unsigned long)&blkcg_policy_bfq,
......@@ -1047,6 +1066,16 @@ struct cftype bfq_blkcg_legacy_files[] = {
.private = (unsigned long)&blkcg_policy_bfq,
.seq_show = blkg_print_stat_ios,
},
#ifdef CONFIG_DEBUG_BLK_CGROUP
{
.name = "bfq.time",
.private = offsetof(struct bfq_group, stats.time),
.seq_show = bfqg_print_stat,
},
{
.name = "bfq.sectors",
.seq_show = bfqg_print_stat_sectors,
},
{
.name = "bfq.io_service_time",
.private = offsetof(struct bfq_group, stats.service_time),
......@@ -1067,17 +1096,9 @@ struct cftype bfq_blkcg_legacy_files[] = {
.private = offsetof(struct bfq_group, stats.queued),
.seq_show = bfqg_print_rwstat,
},
#endif /* CONFIG_DEBUG_BLK_CGROUP */
/* the same statictics which cover the bfqg and its descendants */
{
.name = "bfq.time_recursive",
.private = offsetof(struct bfq_group, stats.time),
.seq_show = bfqg_print_stat_recursive,
},
{
.name = "bfq.sectors_recursive",
.seq_show = bfqg_print_stat_sectors_recursive,
},
{
.name = "bfq.io_service_bytes_recursive",
.private = (unsigned long)&blkcg_policy_bfq,
......@@ -1088,6 +1109,16 @@ struct cftype bfq_blkcg_legacy_files[] = {
.private = (unsigned long)&blkcg_policy_bfq,
.seq_show = blkg_print_stat_ios_recursive,
},
#ifdef CONFIG_DEBUG_BLK_CGROUP
{
.name = "bfq.time_recursive",
.private = offsetof(struct bfq_group, stats.time),
.seq_show = bfqg_print_stat_recursive,
},
{
.name = "bfq.sectors_recursive",
.seq_show = bfqg_print_stat_sectors_recursive,
},
{
.name = "bfq.io_service_time_recursive",
.private = offsetof(struct bfq_group, stats.service_time),
......@@ -1132,6 +1163,7 @@ struct cftype bfq_blkcg_legacy_files[] = {
.private = offsetof(struct bfq_group, stats.dequeue),
.seq_show = bfqg_print_stat,
},
#endif /* CONFIG_DEBUG_BLK_CGROUP */
{ } /* terminate */
};
......@@ -1147,18 +1179,6 @@ struct cftype bfq_blkg_files[] = {
#else /* CONFIG_BFQ_GROUP_IOSCHED */
void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
unsigned int op) { }
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
uint64_t io_start_time, unsigned int op) { }
void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
struct bfq_group *bfqg) {}
......
......@@ -1359,7 +1359,6 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
bfqq->ttime.last_end_request +
bfqd->bfq_slice_idle * 3;
bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq, rq->cmd_flags);
/*
* bfqq deserves to be weight-raised if:
......@@ -1633,7 +1632,6 @@ static void bfq_remove_request(struct request_queue *q,
if (rq->cmd_flags & REQ_META)
bfqq->meta_pending--;
bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags);
}
static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio)
......@@ -1746,6 +1744,7 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
bfqq->next_rq = rq;
bfq_remove_request(q, next);
bfqg_stats_update_io_remove(bfqq_group(bfqq), next->cmd_flags);
spin_unlock_irq(&bfqq->bfqd->lock);
end:
......@@ -2229,7 +2228,6 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
struct bfq_queue *bfqq)
{
if (bfqq) {
bfqg_stats_update_avg_queue_size(bfqq_group(bfqq));
bfq_clear_bfqq_fifo_expire(bfqq);
bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8;
......@@ -3470,7 +3468,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
*/
bfq_clear_bfqq_wait_request(bfqq);
hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
bfqg_stats_update_idle_time(bfqq_group(bfqq));
}
goto keep_queue;
}
......@@ -3696,12 +3693,67 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
{
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
struct request *rq;
#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
struct bfq_queue *in_serv_queue, *bfqq;
bool waiting_rq, idle_timer_disabled;
#endif
spin_lock_irq(&bfqd->lock);
#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
in_serv_queue = bfqd->in_service_queue;
waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
rq = __bfq_dispatch_request(hctx);
idle_timer_disabled =
waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
#else
rq = __bfq_dispatch_request(hctx);
#endif
spin_unlock_irq(&bfqd->lock);
#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
bfqq = rq ? RQ_BFQQ(rq) : NULL;
if (!idle_timer_disabled && !bfqq)
return rq;
/*
* rq and bfqq are guaranteed to exist until this function
* ends, for the following reasons. First, rq can be
* dispatched to the device, and then can be completed and
* freed, only after this function ends. Second, rq cannot be
* merged (and thus freed because of a merge) any longer,
* because it has already started. Thus rq cannot be freed
* before this function ends, and, since rq has a reference to
* bfqq, the same guarantee holds for bfqq too.
*
* In addition, the following queue lock guarantees that
* bfqq_group(bfqq) exists as well.
*/
spin_lock_irq(hctx->queue->queue_lock);
if (idle_timer_disabled)
/*
* Since the idle timer has been disabled,
* in_serv_queue contained some request when
* __bfq_dispatch_request was invoked above, which
* implies that rq was picked exactly from
* in_serv_queue. Thus in_serv_queue == bfqq, and is
* therefore guaranteed to exist because of the above
* arguments.
*/
bfqg_stats_update_idle_time(bfqq_group(in_serv_queue));
if (bfqq) {
struct bfq_group *bfqg = bfqq_group(bfqq);
bfqg_stats_update_avg_queue_size(bfqg);
bfqg_stats_set_start_empty_time(bfqg);
bfqg_stats_update_io_remove(bfqg, rq->cmd_flags);
}
spin_unlock_irq(hctx->queue->queue_lock);
#endif
return rq;
}
......@@ -4159,7 +4211,6 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
*/
bfq_clear_bfqq_wait_request(bfqq);
hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
bfqg_stats_update_idle_time(bfqq_group(bfqq));
/*
* The queue is not empty, because a new request just
......@@ -4174,10 +4225,12 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
}
}
static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
/* returns true if it causes the idle timer to be disabled */
static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
{
struct bfq_queue *bfqq = RQ_BFQQ(rq),
*new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
bool waiting, idle_timer_disabled = false;
if (new_bfqq) {
if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
......@@ -4211,12 +4264,16 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
bfqq = new_bfqq;
}
waiting = bfqq && bfq_bfqq_wait_request(bfqq);
bfq_add_request(rq);
idle_timer_disabled = waiting && !bfq_bfqq_wait_request(bfqq);
rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
list_add_tail(&rq->queuelist, &bfqq->fifo);
bfq_rq_enqueued(bfqd, bfqq, rq);
return idle_timer_disabled;
}
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
......@@ -4224,6 +4281,11 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
{
struct request_queue *q = hctx->queue;
struct bfq_data *bfqd = q->elevator->elevator_data;
#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
struct bfq_queue *bfqq = RQ_BFQQ(rq);
bool idle_timer_disabled = false;
unsigned int cmd_flags;
#endif
spin_lock_irq(&bfqd->lock);
if (blk_mq_sched_try_insert_merge(q, rq)) {
......@@ -4242,7 +4304,17 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
else
list_add_tail(&rq->queuelist, &bfqd->dispatch);
} else {
#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
idle_timer_disabled = __bfq_insert_request(bfqd, rq);
/*
* Update bfqq, because, if a queue merge has occurred
* in __bfq_insert_request, then rq has been
* redirected into a new queue.
*/
bfqq = RQ_BFQQ(rq);
#else
__bfq_insert_request(bfqd, rq);
#endif
if (rq_mergeable(rq)) {
elv_rqhash_add(q, rq);
......@@ -4251,7 +4323,35 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
}
}
#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
/*
* Cache cmd_flags before releasing scheduler lock, because rq
* may disappear afterwards (for example, because of a request
* merge).
*/
cmd_flags = rq->cmd_flags;
#endif
spin_unlock_irq(&bfqd->lock);
#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
if (!bfqq)
return;
/*
* bfqq still exists, because it can disappear only after
* either it is merged with another queue, or the process it
* is associated with exits. But both actions must be taken by
* the same process currently executing this flow of
* instruction.
*
* In addition, the following queue lock guarantees that
* bfqq_group(bfqq) exists as well.
*/
spin_lock_irq(q->queue_lock);
bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags);
if (idle_timer_disabled)
bfqg_stats_update_idle_time(bfqq_group(bfqq));
spin_unlock_irq(q->queue_lock);
#endif
}
static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
......@@ -4428,8 +4528,11 @@ static void bfq_finish_request(struct request *rq)
* lock is held.
*/
if (!RB_EMPTY_NODE(&rq->rb_node))
if (!RB_EMPTY_NODE(&rq->rb_node)) {
bfq_remove_request(rq->q, rq);
bfqg_stats_update_io_remove(bfqq_group(bfqq),
rq->cmd_flags);
}
bfq_put_rq_priv_body(bfqq);
}
......
......@@ -689,7 +689,7 @@ enum bfqq_expiration {
};
struct bfqg_stats {
#ifdef CONFIG_BFQ_GROUP_IOSCHED
#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
/* number of ios merged */
struct blkg_rwstat merged;
/* total time spent on device in ns, may not be accurate w/ queueing */
......@@ -717,7 +717,7 @@ struct bfqg_stats {
uint64_t start_idle_time;
uint64_t start_empty_time;
uint16_t flags;
#endif /* CONFIG_BFQ_GROUP_IOSCHED */
#endif /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
};
#ifdef CONFIG_BFQ_GROUP_IOSCHED
......
......@@ -843,7 +843,6 @@ void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
st->vtime += bfq_delta(served, st->wsum);
bfq_forget_idle(st);
}
bfqg_stats_set_start_empty_time(bfqq_group(bfqq));
bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served);
}
......
......@@ -597,6 +597,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
* so we don't set nor calculate new physical/hw segment counts here
*/
bio->bi_disk = bio_src->bi_disk;
bio->bi_partno = bio_src->bi_partno;
bio_set_flag(bio, BIO_CLONED);
bio->bi_opf = bio_src->bi_opf;
bio->bi_write_hint = bio_src->bi_write_hint;
......
......@@ -637,8 +637,8 @@ void blk_set_queue_dying(struct request_queue *q)
spin_lock_irq(q->queue_lock);
blk_queue_for_each_rl(rl, q) {
if (rl->rq_pool) {
wake_up(&rl->wait[BLK_RW_SYNC]);
wake_up(&rl->wait[BLK_RW_ASYNC]);
wake_up_all(&rl->wait[BLK_RW_SYNC]);
wake_up_all(&rl->wait[BLK_RW_ASYNC]);
}
}
spin_unlock_irq(q->queue_lock);
......
......@@ -146,6 +146,7 @@ static struct amiga_floppy_struct unit[FD_MAX_UNITS];
static struct timer_list flush_track_timer[FD_MAX_UNITS];
static struct timer_list post_write_timer;
static unsigned long post_write_timer_drive;
static struct timer_list motor_on_timer;
static struct timer_list motor_off_timer[FD_MAX_UNITS];
static int on_attempts;
......@@ -323,7 +324,7 @@ static void fd_deselect (int drive)
}
static void motor_on_callback(unsigned long ignored)
static void motor_on_callback(struct timer_list *unused)
{
if (!(ciaa.pra & DSKRDY) || --on_attempts == 0) {
complete_all(&motor_on_completion);
......@@ -355,7 +356,7 @@ static int fd_motor_on(int nr)
on_attempts = -1;
#if 0
printk (KERN_ERR "motor_on failed, turning motor off\n");
fd_motor_off (nr);
fd_motor_off (motor_off_timer + nr);
return 0;
#else
printk (KERN_WARNING "DSKRDY not set after 1.5 seconds - assuming drive is spinning notwithstanding\n");
......@@ -365,20 +366,17 @@ static int fd_motor_on(int nr)
return 1;
}
static void fd_motor_off(unsigned long drive)
static void fd_motor_off(struct timer_list *timer)
{
long calledfromint;
#ifdef MODULE
long decusecount;
unsigned long drive = ((unsigned long)timer -
(unsigned long)&motor_off_timer[0]) /
sizeof(motor_off_timer[0]);
decusecount = drive & 0x40000000;
#endif
calledfromint = drive & 0x80000000;
drive&=3;
if (calledfromint && !try_fdc(drive)) {
if (!try_fdc(drive)) {
/* We would be blocked in an interrupt, so try again later */
motor_off_timer[drive].expires = jiffies + 1;
add_timer(motor_off_timer + drive);
timer->expires = jiffies + 1;
add_timer(timer);
return;
}
unit[drive].motor = 0;
......@@ -392,8 +390,6 @@ static void floppy_off (unsigned int nr)
int drive;
drive = nr & 3;
/* called this way it is always from interrupt */
motor_off_timer[drive].data = nr | 0x80000000;
mod_timer(motor_off_timer + drive, jiffies + 3*HZ);
}
......@@ -435,7 +431,7 @@ static int fd_calibrate(int drive)
break;
if (--n == 0) {
printk (KERN_ERR "fd%d: calibrate failed, turning motor off\n", drive);
fd_motor_off (drive);
fd_motor_off (motor_off_timer + drive);
unit[drive].track = -1;
rel_fdc();
return 0;
......@@ -564,7 +560,7 @@ static irqreturn_t fd_block_done(int irq, void *dummy)
if (block_flag == 2) { /* writing */
writepending = 2;
post_write_timer.expires = jiffies + 1; /* at least 2 ms */
post_write_timer.data = selected;
post_write_timer_drive = selected;
add_timer(&post_write_timer);
}
else { /* reading */
......@@ -651,6 +647,10 @@ static void post_write (unsigned long drive)
rel_fdc(); /* corresponds to get_fdc() in raw_write */
}
static void post_write_callback(struct timer_list *timer)
{
post_write(post_write_timer_drive);
}
/*
* The following functions are to convert the block contents into raw data
......@@ -1244,8 +1244,12 @@ static void dos_write(int disk)
/* FIXME: this assumes the drive is still spinning -
* which is only true if we complete writing a track within three seconds
*/
static void flush_track_callback(unsigned long nr)
static void flush_track_callback(struct timer_list *timer)
{
unsigned long nr = ((unsigned long)timer -
(unsigned long)&flush_track_timer[0]) /
sizeof(flush_track_timer[0]);
nr&=3;
writefromint = 1;
if (!try_fdc(nr)) {
......@@ -1649,8 +1653,7 @@ static void floppy_release(struct gendisk *disk, fmode_t mode)
fd_ref[drive] = 0;
}
#ifdef MODULE
/* the mod_use counter is handled this way */
floppy_off (drive | 0x40000000);
floppy_off (drive);
#endif
mutex_unlock(&amiflop_mutex);
}
......@@ -1791,27 +1794,19 @@ static int __init amiga_floppy_probe(struct platform_device *pdev)
floppy_find, NULL, NULL);
/* initialize variables */
init_timer(&motor_on_timer);
timer_setup(&motor_on_timer, motor_on_callback, 0);
motor_on_timer.expires = 0;
motor_on_timer.data = 0;
motor_on_timer.function = motor_on_callback;
for (i = 0; i < FD_MAX_UNITS; i++) {
init_timer(&motor_off_timer[i]);
timer_setup(&motor_off_timer[i], fd_motor_off, 0);
motor_off_timer[i].expires = 0;
motor_off_timer[i].data = i|0x80000000;
motor_off_timer[i].function = fd_motor_off;
init_timer(&flush_track_timer[i]);
timer_setup(&flush_track_timer[i], flush_track_callback, 0);
flush_track_timer[i].expires = 0;
flush_track_timer[i].data = i;
flush_track_timer[i].function = flush_track_callback;
unit[i].track = -1;
}
init_timer(&post_write_timer);
timer_setup(&post_write_timer, post_write_callback, 0);
post_write_timer.expires = 0;
post_write_timer.data = 0;
post_write_timer.function = post_write;
for (i = 0; i < 128; i++)
mfmdecode[i]=255;
......
......@@ -744,7 +744,7 @@ count_targets(struct aoedev *d, int *untainted)
}
static void
rexmit_timer(ulong vp)
rexmit_timer(struct timer_list *timer)
{
struct aoedev *d;
struct aoetgt *t;
......@@ -758,7 +758,7 @@ rexmit_timer(ulong vp)
int utgts; /* number of aoetgt descriptors (not slots) */
int since;
d = (struct aoedev *) vp;
d = from_timer(d, timer, timer);
spin_lock_irqsave(&d->lock, flags);
......@@ -1429,7 +1429,7 @@ aoecmd_ata_id(struct aoedev *d)
d->rttavg = RTTAVG_INIT;
d->rttdev = RTTDEV_INIT;
d->timer.function = rexmit_timer;
d->timer.function = (TIMER_FUNC_TYPE)rexmit_timer;
skb = skb_clone(skb, GFP_ATOMIC);
if (skb) {
......
......@@ -15,7 +15,6 @@
#include <linux/string.h>
#include "aoe.h"
static void dummy_timer(ulong);
static void freetgt(struct aoedev *d, struct aoetgt *t);
static void skbpoolfree(struct aoedev *d);
......@@ -146,11 +145,11 @@ aoedev_put(struct aoedev *d)
}
static void
dummy_timer(ulong vp)
dummy_timer(struct timer_list *t)
{
struct aoedev *d;
d = (struct aoedev *)vp;
d = from_timer(d, t, timer);
if (d->flags & DEVFL_TKILL)
return;
d->timer.expires = jiffies + HZ;
......@@ -466,9 +465,7 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
INIT_WORK(&d->work, aoecmd_sleepwork);
spin_lock_init(&d->lock);
skb_queue_head_init(&d->skbpool);
init_timer(&d->timer);
d->timer.data = (ulong) d;
d->timer.function = dummy_timer;
timer_setup(&d->timer, dummy_timer, 0);
d->timer.expires = jiffies + HZ;
add_timer(&d->timer);
d->bufpool = NULL; /* defer to aoeblk_gdalloc */
......
......@@ -903,10 +903,14 @@ static void unlock_fdc(void)
}
/* switches the motor off after a given timeout */
static void motor_off_callback(unsigned long nr)
static void motor_off_callback(struct timer_list *t)
{
unsigned long nr = t - motor_off_timer;
unsigned char mask = ~(0x10 << UNIT(nr));
if (WARN_ON_ONCE(nr >= N_DRIVE))
return;
set_dor(FDC(nr), mask, 0);
}
......@@ -3047,7 +3051,7 @@ static void raw_cmd_done(int flag)
else
raw_cmd->flags &= ~FD_RAW_DISK_CHANGE;
if (raw_cmd->flags & FD_RAW_NO_MOTOR_AFTER)
motor_off_callback(current_drive);
motor_off_callback(&motor_off_timer[current_drive]);
if (raw_cmd->next &&
(!(raw_cmd->flags & FD_RAW_FAILURE) ||
......@@ -4542,7 +4546,7 @@ static int __init do_floppy_init(void)
disks[drive]->fops = &floppy_fops;
sprintf(disks[drive]->disk_name, "fd%d", drive);
setup_timer(&motor_off_timer[drive], motor_off_callback, drive);
timer_setup(&motor_off_timer[drive], motor_off_callback, 0);
}
err = register_blkdev(FLOPPY_MAJOR, "fd");
......
......@@ -239,10 +239,10 @@ static unsigned short write_postamble[] = {
static void seek_track(struct floppy_state *fs, int n);
static void init_dma(struct dbdma_cmd *cp, int cmd, void *buf, int count);
static void act(struct floppy_state *fs);
static void scan_timeout(unsigned long data);
static void seek_timeout(unsigned long data);
static void settle_timeout(unsigned long data);
static void xfer_timeout(unsigned long data);
static void scan_timeout(struct timer_list *t);
static void seek_timeout(struct timer_list *t);
static void settle_timeout(struct timer_list *t);
static void xfer_timeout(struct timer_list *t);
static irqreturn_t swim3_interrupt(int irq, void *dev_id);
/*static void fd_dma_interrupt(int irq, void *dev_id);*/
static int grab_drive(struct floppy_state *fs, enum swim_state state,
......@@ -392,13 +392,12 @@ static void do_fd_request(struct request_queue * q)
}
static void set_timeout(struct floppy_state *fs, int nticks,
void (*proc)(unsigned long))
void (*proc)(struct timer_list *t))
{
if (fs->timeout_pending)
del_timer(&fs->timeout);
fs->timeout.expires = jiffies + nticks;
fs->timeout.function = proc;
fs->timeout.data = (unsigned long) fs;
fs->timeout.function = (TIMER_FUNC_TYPE)proc;
add_timer(&fs->timeout);
fs->timeout_pending = 1;
}
......@@ -569,9 +568,9 @@ static void act(struct floppy_state *fs)
}
}
static void scan_timeout(unsigned long data)
static void scan_timeout(struct timer_list *t)
{
struct floppy_state *fs = (struct floppy_state *) data;
struct floppy_state *fs = from_timer(fs, t, timeout);
struct swim3 __iomem *sw = fs->swim3;
unsigned long flags;
......@@ -594,9 +593,9 @@ static void scan_timeout(unsigned long data)
spin_unlock_irqrestore(&swim3_lock, flags);
}
static void seek_timeout(unsigned long data)
static void seek_timeout(struct timer_list *t)
{
struct floppy_state *fs = (struct floppy_state *) data;
struct floppy_state *fs = from_timer(fs, t, timeout);
struct swim3 __iomem *sw = fs->swim3;
unsigned long flags;
......@@ -614,9 +613,9 @@ static void seek_timeout(unsigned long data)
spin_unlock_irqrestore(&swim3_lock, flags);
}
static void settle_timeout(unsigned long data)
static void settle_timeout(struct timer_list *t)
{
struct floppy_state *fs = (struct floppy_state *) data;
struct floppy_state *fs = from_timer(fs, t, timeout);
struct swim3 __iomem *sw = fs->swim3;
unsigned long flags;
......@@ -644,9 +643,9 @@ static void settle_timeout(unsigned long data)
spin_unlock_irqrestore(&swim3_lock, flags);
}
static void xfer_timeout(unsigned long data)
static void xfer_timeout(struct timer_list *t)
{
struct floppy_state *fs = (struct floppy_state *) data;
struct floppy_state *fs = from_timer(fs, t, timeout);
struct swim3 __iomem *sw = fs->swim3;
struct dbdma_regs __iomem *dr = fs->dma;
unsigned long flags;
......@@ -1182,7 +1181,7 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
return -EBUSY;
}
init_timer(&fs->timeout);
timer_setup(&fs->timeout, NULL, 0);
swim3_info("SWIM3 floppy controller %s\n",
mdev->media_bay ? "in media bay" : "");
......
......@@ -1333,8 +1333,7 @@ static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq)
unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9);
struct scsi_request *req = scsi_req(rq);
scsi_req_init(req);
memset(req->cmd, 0, BLK_MAX_CDB);
q->initialize_rq_fn(rq);
if (rq_data_dir(rq) == READ)
req->cmd[0] = GPCMD_READ_10;
......
......@@ -147,9 +147,9 @@ static void scale_stats(struct cache_stats *stats, unsigned long rescale_at)
}
}
static void scale_accounting(unsigned long data)
static void scale_accounting(struct timer_list *t)
{
struct cache_accounting *acc = (struct cache_accounting *) data;
struct cache_accounting *acc = from_timer(acc, t, timer);
#define move_stat(name) do { \
unsigned t = atomic_xchg(&acc->collector.name, 0); \
......@@ -234,9 +234,7 @@ void bch_cache_accounting_init(struct cache_accounting *acc,
kobject_init(&acc->day.kobj, &bch_stats_ktype);
closure_init(&acc->cl, parent);
init_timer(&acc->timer);
timer_setup(&acc->timer, scale_accounting, 0);
acc->timer.expires = jiffies + accounting_delay;
acc->timer.data = (unsigned long) acc;
acc->timer.function = scale_accounting;
add_timer(&acc->timer);
}
......@@ -44,9 +44,9 @@ struct dm_delay_info {
static DEFINE_MUTEX(delayed_bios_lock);
static void handle_delayed_timer(unsigned long data)
static void handle_delayed_timer(struct timer_list *t)
{
struct delay_c *dc = (struct delay_c *)data;
struct delay_c *dc = from_timer(dc, t, delay_timer);
queue_work(dc->kdelayd_wq, &dc->flush_expired_bios);
}
......@@ -195,7 +195,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad_queue;
}
setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc);
timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
INIT_LIST_HEAD(&dc->delayed_bios);
......
......@@ -1094,9 +1094,9 @@ static void sleep_on_endio_wait(struct dm_integrity_c *ic)
__remove_wait_queue(&ic->endio_wait, &wait);
}
static void autocommit_fn(unsigned long data)
static void autocommit_fn(struct timer_list *t)
{
struct dm_integrity_c *ic = (struct dm_integrity_c *)data;
struct dm_integrity_c *ic = from_timer(ic, t, autocommit_timer);
if (likely(!dm_integrity_failed(ic)))
queue_work(ic->commit_wq, &ic->commit_work);
......@@ -2942,7 +2942,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
ic->autocommit_jiffies = msecs_to_jiffies(sync_msec);
ic->autocommit_msec = sync_msec;
setup_timer(&ic->autocommit_timer, autocommit_fn, (unsigned long)ic);
timer_setup(&ic->autocommit_timer, autocommit_fn, 0);
ic->io = dm_io_client_create();
if (IS_ERR(ic->io)) {
......
......@@ -94,9 +94,9 @@ static void wakeup_mirrord(void *context)
queue_work(ms->kmirrord_wq, &ms->kmirrord_work);
}
static void delayed_wake_fn(unsigned long data)
static void delayed_wake_fn(struct timer_list *t)
{
struct mirror_set *ms = (struct mirror_set *) data;
struct mirror_set *ms = from_timer(ms, t, timer);
clear_bit(0, &ms->timer_pending);
wakeup_mirrord(ms);
......@@ -108,8 +108,6 @@ static void delayed_wake(struct mirror_set *ms)
return;
ms->timer.expires = jiffies + HZ / 5;
ms->timer.data = (unsigned long) ms;
ms->timer.function = delayed_wake_fn;
add_timer(&ms->timer);
}
......@@ -1133,7 +1131,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto err_free_context;
}
INIT_WORK(&ms->kmirrord_work, do_mirror);
init_timer(&ms->timer);
timer_setup(&ms->timer, delayed_wake_fn, 0);
ms->timer_pending = 0;
INIT_WORK(&ms->trigger_event, trigger_event);
......
......@@ -541,7 +541,7 @@ static void mddev_put(struct mddev *mddev)
bioset_free(sync_bs);
}
static void md_safemode_timeout(unsigned long data);
static void md_safemode_timeout(struct timer_list *t);
void mddev_init(struct mddev *mddev)
{
......@@ -550,8 +550,7 @@ void mddev_init(struct mddev *mddev)
mutex_init(&mddev->bitmap_info.mutex);
INIT_LIST_HEAD(&mddev->disks);
INIT_LIST_HEAD(&mddev->all_mddevs);
setup_timer(&mddev->safemode_timer, md_safemode_timeout,
(unsigned long) mddev);
timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
atomic_set(&mddev->active, 1);
atomic_set(&mddev->openers, 0);
atomic_set(&mddev->active_io, 0);
......@@ -5404,9 +5403,9 @@ static int add_named_array(const char *val, const struct kernel_param *kp)
return -EINVAL;
}
static void md_safemode_timeout(unsigned long data)
static void md_safemode_timeout(struct timer_list *t)
{
struct mddev *mddev = (struct mddev *) data;
struct mddev *mddev = from_timer(mddev, t, safemode_timer);
mddev->safemode = 1;
if (mddev->external)
......
......@@ -2144,6 +2144,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_fcp_iod *fod)
{
struct nvme_fc_cmd_iu *cmdiu = &fod->cmdiubuf;
u32 xfrlen = be32_to_cpu(cmdiu->data_len);
int ret;
/*
......@@ -2157,7 +2158,6 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
fod->fcpreq->done = nvmet_fc_xmt_fcp_op_done;
fod->req.transfer_len = be32_to_cpu(cmdiu->data_len);
if (cmdiu->flags & FCNVME_CMD_FLAGS_WRITE) {
fod->io_dir = NVMET_FCP_WRITE;
if (!nvme_is_write(&cmdiu->sqe))
......@@ -2168,7 +2168,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
goto transport_error;
} else {
fod->io_dir = NVMET_FCP_NODATA;
if (fod->req.transfer_len)
if (xfrlen)
goto transport_error;
}
......@@ -2192,6 +2192,8 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
return;
}
fod->req.transfer_len = xfrlen;
/* keep a running counter of tail position */
atomic_inc(&fod->queue->sqtail);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment