Commit 20e4d813 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe

blk-mq: simplify queue mapping & schedule with each possisble CPU

The previous patch assigns interrupt vectors to all possible CPUs, so
now hctx can be mapped to possible CPUs, this patch applies this fact
to simplify queue mapping & schedule so that we don't need to handle
CPU hotplug for dealing with physical CPU plug & unplug. With this
simplication, we can work well on physical CPU plug & unplug, which
is a normal use case for VM at least.

Make sure we allocate blk_mq_ctx structures for all possible CPUs, and
set hctx->numa_node for possible CPUs which are mapped to this hctx. And
only choose the online CPUs for schedule.
Reported-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
Tested-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
Tested-by: default avatarStefan Haberland <sth@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Fixes: 4b855ad3 ("blk-mq: Create hctx for each present CPU")
(merged the three into one because any single one may not work, and fix
selecting online CPUs for scheduler)
Signed-off-by: default avatarMing Lei <ming.lei@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 84676c1f
......@@ -440,7 +440,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
blk_queue_exit(q);
return ERR_PTR(-EXDEV);
}
cpu = cpumask_first(alloc_data.hctx->cpumask);
cpu = cpumask_first_and(alloc_data.hctx->cpumask, cpu_online_mask);
alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
rq = blk_mq_get_request(q, NULL, op, &alloc_data);
......@@ -1324,9 +1324,10 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
if (--hctx->next_cpu_batch <= 0) {
int next_cpu;
next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask);
next_cpu = cpumask_next_and(hctx->next_cpu, hctx->cpumask,
cpu_online_mask);
if (next_cpu >= nr_cpu_ids)
next_cpu = cpumask_first(hctx->cpumask);
next_cpu = cpumask_first_and(hctx->cpumask,cpu_online_mask);
hctx->next_cpu = next_cpu;
hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
......@@ -2220,16 +2221,11 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
INIT_LIST_HEAD(&__ctx->rq_list);
__ctx->queue = q;
/* If the cpu isn't present, the cpu is mapped to first hctx */
if (!cpu_present(i))
continue;
hctx = blk_mq_map_queue(q, i);
/*
* Set local node, IFF we have more than one hw queue. If
* not, we remain on the home node of the device
*/
hctx = blk_mq_map_queue(q, i);
if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
hctx->numa_node = local_memory_node(cpu_to_node(i));
}
......@@ -2286,7 +2282,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
*
* If the cpu isn't present, the cpu is mapped to first hctx.
*/
for_each_present_cpu(i) {
for_each_possible_cpu(i) {
hctx_idx = q->mq_map[i];
/* unmapped hw queue can be remapped after CPU topo changed */
if (!set->tags[hctx_idx] &&
......@@ -2340,7 +2336,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
/*
* Initialize batch roundrobin counts
*/
hctx->next_cpu = cpumask_first(hctx->cpumask);
hctx->next_cpu = cpumask_first_and(hctx->cpumask,
cpu_online_mask);
hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment