Commit 27728bf0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "Another week, another round of fixes.

  These have been brewing for a bit and in various iterations, but I
  feel pretty comfortable about the quality of them.  They fix real
  issues.  The pull request is mostly blk-mq related, and the only one
  not fixing a real bug, is the tag iterator abstraction from Christoph.
  But it's pretty trivial, and we'll need it for another fix soon.

  Apart from the blk-mq fixes, there's an NVMe affinity fix from Keith,
  and a single fix for xen-blkback from Roger fixing failure to free
  requests on disconnect"

* 'for-linus' of git://git.kernel.dk/linux-block:
  blk-mq: factor out a helper to iterate all tags for a request_queue
  blk-mq: fix racy updates of rq->errors
  blk-mq: fix deadlock when reading cpu_list
  blk-mq: avoid inserting requests before establishing new mapping
  blk-mq: fix q->mq_usage_counter access race
  blk-mq: Fix use after of free q->mq_map
  blk-mq: fix sysfs registration/unregistration race
  blk-mq: avoid setting hctx->tags->cpumask before allocation
  NVMe: Set affinity after allocating request queues
  xen/blkback: free requests on disconnection
parents 36f8dafe 0bf6cd5b
...@@ -31,7 +31,8 @@ static int get_first_sibling(unsigned int cpu) ...@@ -31,7 +31,8 @@ static int get_first_sibling(unsigned int cpu)
return cpu; return cpu;
} }
int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
const struct cpumask *online_mask)
{ {
unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling; unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
cpumask_var_t cpus; cpumask_var_t cpus;
...@@ -41,7 +42,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) ...@@ -41,7 +42,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
cpumask_clear(cpus); cpumask_clear(cpus);
nr_cpus = nr_uniq_cpus = 0; nr_cpus = nr_uniq_cpus = 0;
for_each_online_cpu(i) { for_each_cpu(i, online_mask) {
nr_cpus++; nr_cpus++;
first_sibling = get_first_sibling(i); first_sibling = get_first_sibling(i);
if (!cpumask_test_cpu(first_sibling, cpus)) if (!cpumask_test_cpu(first_sibling, cpus))
...@@ -51,7 +52,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) ...@@ -51,7 +52,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
queue = 0; queue = 0;
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
if (!cpu_online(i)) { if (!cpumask_test_cpu(i, online_mask)) {
map[i] = 0; map[i] = 0;
continue; continue;
} }
...@@ -95,7 +96,7 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set) ...@@ -95,7 +96,7 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set)
if (!map) if (!map)
return NULL; return NULL;
if (!blk_mq_update_queue_map(map, set->nr_hw_queues)) if (!blk_mq_update_queue_map(map, set->nr_hw_queues, cpu_online_mask))
return map; return map;
kfree(map); kfree(map);
......
...@@ -229,8 +229,6 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) ...@@ -229,8 +229,6 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
unsigned int i, first = 1; unsigned int i, first = 1;
ssize_t ret = 0; ssize_t ret = 0;
blk_mq_disable_hotplug();
for_each_cpu(i, hctx->cpumask) { for_each_cpu(i, hctx->cpumask) {
if (first) if (first)
ret += sprintf(ret + page, "%u", i); ret += sprintf(ret + page, "%u", i);
...@@ -240,8 +238,6 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) ...@@ -240,8 +238,6 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
first = 0; first = 0;
} }
blk_mq_enable_hotplug();
ret += sprintf(ret + page, "\n"); ret += sprintf(ret + page, "\n");
return ret; return ret;
} }
...@@ -343,7 +339,7 @@ static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx) ...@@ -343,7 +339,7 @@ static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx)
struct blk_mq_ctx *ctx; struct blk_mq_ctx *ctx;
int i; int i;
if (!hctx->nr_ctx || !(hctx->flags & BLK_MQ_F_SYSFS_UP)) if (!hctx->nr_ctx)
return; return;
hctx_for_each_ctx(hctx, ctx, i) hctx_for_each_ctx(hctx, ctx, i)
...@@ -358,7 +354,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) ...@@ -358,7 +354,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
struct blk_mq_ctx *ctx; struct blk_mq_ctx *ctx;
int i, ret; int i, ret;
if (!hctx->nr_ctx || !(hctx->flags & BLK_MQ_F_SYSFS_UP)) if (!hctx->nr_ctx)
return 0; return 0;
ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num); ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num);
...@@ -381,6 +377,8 @@ void blk_mq_unregister_disk(struct gendisk *disk) ...@@ -381,6 +377,8 @@ void blk_mq_unregister_disk(struct gendisk *disk)
struct blk_mq_ctx *ctx; struct blk_mq_ctx *ctx;
int i, j; int i, j;
blk_mq_disable_hotplug();
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
blk_mq_unregister_hctx(hctx); blk_mq_unregister_hctx(hctx);
...@@ -395,6 +393,9 @@ void blk_mq_unregister_disk(struct gendisk *disk) ...@@ -395,6 +393,9 @@ void blk_mq_unregister_disk(struct gendisk *disk)
kobject_put(&q->mq_kobj); kobject_put(&q->mq_kobj);
kobject_put(&disk_to_dev(disk)->kobj); kobject_put(&disk_to_dev(disk)->kobj);
q->mq_sysfs_init_done = false;
blk_mq_enable_hotplug();
} }
static void blk_mq_sysfs_init(struct request_queue *q) static void blk_mq_sysfs_init(struct request_queue *q)
...@@ -425,27 +426,30 @@ int blk_mq_register_disk(struct gendisk *disk) ...@@ -425,27 +426,30 @@ int blk_mq_register_disk(struct gendisk *disk)
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
int ret, i; int ret, i;
blk_mq_disable_hotplug();
blk_mq_sysfs_init(q); blk_mq_sysfs_init(q);
ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
if (ret < 0) if (ret < 0)
return ret; goto out;
kobject_uevent(&q->mq_kobj, KOBJ_ADD); kobject_uevent(&q->mq_kobj, KOBJ_ADD);
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
hctx->flags |= BLK_MQ_F_SYSFS_UP;
ret = blk_mq_register_hctx(hctx); ret = blk_mq_register_hctx(hctx);
if (ret) if (ret)
break; break;
} }
if (ret) { if (ret)
blk_mq_unregister_disk(disk); blk_mq_unregister_disk(disk);
return ret; else
} q->mq_sysfs_init_done = true;
out:
blk_mq_enable_hotplug();
return 0; return ret;
} }
EXPORT_SYMBOL_GPL(blk_mq_register_disk); EXPORT_SYMBOL_GPL(blk_mq_register_disk);
...@@ -454,6 +458,9 @@ void blk_mq_sysfs_unregister(struct request_queue *q) ...@@ -454,6 +458,9 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
int i; int i;
if (!q->mq_sysfs_init_done)
return;
queue_for_each_hw_ctx(q, hctx, i) queue_for_each_hw_ctx(q, hctx, i)
blk_mq_unregister_hctx(hctx); blk_mq_unregister_hctx(hctx);
} }
...@@ -463,6 +470,9 @@ int blk_mq_sysfs_register(struct request_queue *q) ...@@ -463,6 +470,9 @@ int blk_mq_sysfs_register(struct request_queue *q)
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
int i, ret = 0; int i, ret = 0;
if (!q->mq_sysfs_init_done)
return ret;
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_register_hctx(hctx); ret = blk_mq_register_hctx(hctx);
if (ret) if (ret)
......
...@@ -471,17 +471,30 @@ void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, ...@@ -471,17 +471,30 @@ void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
} }
EXPORT_SYMBOL(blk_mq_all_tag_busy_iter); EXPORT_SYMBOL(blk_mq_all_tag_busy_iter);
void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
void *priv) void *priv)
{ {
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
struct blk_mq_tags *tags = hctx->tags; struct blk_mq_tags *tags = hctx->tags;
/*
* If not software queues are currently mapped to this
* hardware queue, there's nothing to check
*/
if (!blk_mq_hw_queue_mapped(hctx))
continue;
if (tags->nr_reserved_tags) if (tags->nr_reserved_tags)
bt_for_each(hctx, &tags->breserved_tags, 0, fn, priv, true); bt_for_each(hctx, &tags->breserved_tags, 0, fn, priv, true);
bt_for_each(hctx, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv, bt_for_each(hctx, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv,
false); false);
}
} }
EXPORT_SYMBOL(blk_mq_tag_busy_iter);
static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt) static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt)
{ {
......
...@@ -58,6 +58,8 @@ extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); ...@@ -58,6 +58,8 @@ extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag); extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth); extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);
extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
void *priv);
enum { enum {
BLK_MQ_TAG_CACHE_MIN = 1, BLK_MQ_TAG_CACHE_MIN = 1,
......
...@@ -393,14 +393,16 @@ void __blk_mq_complete_request(struct request *rq) ...@@ -393,14 +393,16 @@ void __blk_mq_complete_request(struct request *rq)
* Ends all I/O on a request. It does not handle partial completions. * Ends all I/O on a request. It does not handle partial completions.
* The actual completion happens out-of-order, through a IPI handler. * The actual completion happens out-of-order, through a IPI handler.
**/ **/
void blk_mq_complete_request(struct request *rq) void blk_mq_complete_request(struct request *rq, int error)
{ {
struct request_queue *q = rq->q; struct request_queue *q = rq->q;
if (unlikely(blk_should_fake_timeout(q))) if (unlikely(blk_should_fake_timeout(q)))
return; return;
if (!blk_mark_rq_complete(rq)) if (!blk_mark_rq_complete(rq)) {
rq->errors = error;
__blk_mq_complete_request(rq); __blk_mq_complete_request(rq);
}
} }
EXPORT_SYMBOL(blk_mq_complete_request); EXPORT_SYMBOL(blk_mq_complete_request);
...@@ -616,10 +618,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, ...@@ -616,10 +618,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
* If a request wasn't started before the queue was * If a request wasn't started before the queue was
* marked dying, kill it here or it'll go unnoticed. * marked dying, kill it here or it'll go unnoticed.
*/ */
if (unlikely(blk_queue_dying(rq->q))) { if (unlikely(blk_queue_dying(rq->q)))
rq->errors = -EIO; blk_mq_complete_request(rq, -EIO);
blk_mq_complete_request(rq);
}
return; return;
} }
if (rq->cmd_flags & REQ_NO_TIMEOUT) if (rq->cmd_flags & REQ_NO_TIMEOUT)
...@@ -641,24 +641,16 @@ static void blk_mq_rq_timer(unsigned long priv) ...@@ -641,24 +641,16 @@ static void blk_mq_rq_timer(unsigned long priv)
.next = 0, .next = 0,
.next_set = 0, .next_set = 0,
}; };
struct blk_mq_hw_ctx *hctx;
int i; int i;
queue_for_each_hw_ctx(q, hctx, i) { blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
/*
* If not software queues are currently mapped to this
* hardware queue, there's nothing to check
*/
if (!blk_mq_hw_queue_mapped(hctx))
continue;
blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data);
}
if (data.next_set) { if (data.next_set) {
data.next = blk_rq_timeout(round_jiffies_up(data.next)); data.next = blk_rq_timeout(round_jiffies_up(data.next));
mod_timer(&q->timeout, data.next); mod_timer(&q->timeout, data.next);
} else { } else {
struct blk_mq_hw_ctx *hctx;
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
/* the hctx may be unmapped, so check it here */ /* the hctx may be unmapped, so check it here */
if (blk_mq_hw_queue_mapped(hctx)) if (blk_mq_hw_queue_mapped(hctx))
...@@ -1789,13 +1781,19 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, ...@@ -1789,13 +1781,19 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
} }
} }
static void blk_mq_map_swqueue(struct request_queue *q) static void blk_mq_map_swqueue(struct request_queue *q,
const struct cpumask *online_mask)
{ {
unsigned int i; unsigned int i;
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx; struct blk_mq_ctx *ctx;
struct blk_mq_tag_set *set = q->tag_set; struct blk_mq_tag_set *set = q->tag_set;
/*
* Avoid others reading imcomplete hctx->cpumask through sysfs
*/
mutex_lock(&q->sysfs_lock);
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
cpumask_clear(hctx->cpumask); cpumask_clear(hctx->cpumask);
hctx->nr_ctx = 0; hctx->nr_ctx = 0;
...@@ -1806,16 +1804,17 @@ static void blk_mq_map_swqueue(struct request_queue *q) ...@@ -1806,16 +1804,17 @@ static void blk_mq_map_swqueue(struct request_queue *q)
*/ */
queue_for_each_ctx(q, ctx, i) { queue_for_each_ctx(q, ctx, i) {
/* If the cpu isn't online, the cpu is mapped to first hctx */ /* If the cpu isn't online, the cpu is mapped to first hctx */
if (!cpu_online(i)) if (!cpumask_test_cpu(i, online_mask))
continue; continue;
hctx = q->mq_ops->map_queue(q, i); hctx = q->mq_ops->map_queue(q, i);
cpumask_set_cpu(i, hctx->cpumask); cpumask_set_cpu(i, hctx->cpumask);
cpumask_set_cpu(i, hctx->tags->cpumask);
ctx->index_hw = hctx->nr_ctx; ctx->index_hw = hctx->nr_ctx;
hctx->ctxs[hctx->nr_ctx++] = ctx; hctx->ctxs[hctx->nr_ctx++] = ctx;
} }
mutex_unlock(&q->sysfs_lock);
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
struct blk_mq_ctxmap *map = &hctx->ctx_map; struct blk_mq_ctxmap *map = &hctx->ctx_map;
...@@ -1851,6 +1850,14 @@ static void blk_mq_map_swqueue(struct request_queue *q) ...@@ -1851,6 +1850,14 @@ static void blk_mq_map_swqueue(struct request_queue *q)
hctx->next_cpu = cpumask_first(hctx->cpumask); hctx->next_cpu = cpumask_first(hctx->cpumask);
hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
} }
queue_for_each_ctx(q, ctx, i) {
if (!cpumask_test_cpu(i, online_mask))
continue;
hctx = q->mq_ops->map_queue(q, i);
cpumask_set_cpu(i, hctx->tags->cpumask);
}
} }
static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set) static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set)
...@@ -1918,6 +1925,9 @@ void blk_mq_release(struct request_queue *q) ...@@ -1918,6 +1925,9 @@ void blk_mq_release(struct request_queue *q)
kfree(hctx); kfree(hctx);
} }
kfree(q->mq_map);
q->mq_map = NULL;
kfree(q->queue_hw_ctx); kfree(q->queue_hw_ctx);
/* ctx kobj stays in queue_ctx */ /* ctx kobj stays in queue_ctx */
...@@ -2027,13 +2037,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, ...@@ -2027,13 +2037,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
if (blk_mq_init_hw_queues(q, set)) if (blk_mq_init_hw_queues(q, set))
goto err_hctxs; goto err_hctxs;
get_online_cpus();
mutex_lock(&all_q_mutex); mutex_lock(&all_q_mutex);
list_add_tail(&q->all_q_node, &all_q_list);
mutex_unlock(&all_q_mutex);
list_add_tail(&q->all_q_node, &all_q_list);
blk_mq_add_queue_tag_set(set, q); blk_mq_add_queue_tag_set(set, q);
blk_mq_map_swqueue(q, cpu_online_mask);
blk_mq_map_swqueue(q); mutex_unlock(&all_q_mutex);
put_online_cpus();
return q; return q;
...@@ -2057,30 +2069,27 @@ void blk_mq_free_queue(struct request_queue *q) ...@@ -2057,30 +2069,27 @@ void blk_mq_free_queue(struct request_queue *q)
{ {
struct blk_mq_tag_set *set = q->tag_set; struct blk_mq_tag_set *set = q->tag_set;
mutex_lock(&all_q_mutex);
list_del_init(&q->all_q_node);
mutex_unlock(&all_q_mutex);
blk_mq_del_queue_tag_set(q); blk_mq_del_queue_tag_set(q);
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
blk_mq_free_hw_queues(q, set); blk_mq_free_hw_queues(q, set);
percpu_ref_exit(&q->mq_usage_counter); percpu_ref_exit(&q->mq_usage_counter);
kfree(q->mq_map);
q->mq_map = NULL;
mutex_lock(&all_q_mutex);
list_del_init(&q->all_q_node);
mutex_unlock(&all_q_mutex);
} }
/* Basically redo blk_mq_init_queue with queue frozen */ /* Basically redo blk_mq_init_queue with queue frozen */
static void blk_mq_queue_reinit(struct request_queue *q) static void blk_mq_queue_reinit(struct request_queue *q,
const struct cpumask *online_mask)
{ {
WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
blk_mq_sysfs_unregister(q); blk_mq_sysfs_unregister(q);
blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues); blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask);
/* /*
* redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe
...@@ -2088,7 +2097,7 @@ static void blk_mq_queue_reinit(struct request_queue *q) ...@@ -2088,7 +2097,7 @@ static void blk_mq_queue_reinit(struct request_queue *q)
* involves free and re-allocate memory, worthy doing?) * involves free and re-allocate memory, worthy doing?)
*/ */
blk_mq_map_swqueue(q); blk_mq_map_swqueue(q, online_mask);
blk_mq_sysfs_register(q); blk_mq_sysfs_register(q);
} }
...@@ -2097,16 +2106,43 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, ...@@ -2097,16 +2106,43 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
unsigned long action, void *hcpu) unsigned long action, void *hcpu)
{ {
struct request_queue *q; struct request_queue *q;
int cpu = (unsigned long)hcpu;
/*
* New online cpumask which is going to be set in this hotplug event.
* Declare this cpumasks as global as cpu-hotplug operation is invoked
* one-by-one and dynamically allocating this could result in a failure.
*/
static struct cpumask online_new;
/* /*
* Before new mappings are established, hotadded cpu might already * Before hotadded cpu starts handling requests, new mappings must
* start handling requests. This doesn't break anything as we map * be established. Otherwise, these requests in hw queue might
* offline CPUs to first hardware queue. We will re-init the queue * never be dispatched.
* below to get optimal settings. *
* For example, there is a single hw queue (hctx) and two CPU queues
* (ctx0 for CPU0, and ctx1 for CPU1).
*
* Now CPU1 is just onlined and a request is inserted into
* ctx1->rq_list and set bit0 in pending bitmap as ctx1->index_hw is
* still zero.
*
* And then while running hw queue, flush_busy_ctxs() finds bit0 is
* set in pending bitmap and tries to retrieve requests in
* hctx->ctxs[0]->rq_list. But htx->ctxs[0] is a pointer to ctx0,
* so the request in ctx1->rq_list is ignored.
*/ */
if (action != CPU_DEAD && action != CPU_DEAD_FROZEN && switch (action & ~CPU_TASKS_FROZEN) {
action != CPU_ONLINE && action != CPU_ONLINE_FROZEN) case CPU_DEAD:
case CPU_UP_CANCELED:
cpumask_copy(&online_new, cpu_online_mask);
break;
case CPU_UP_PREPARE:
cpumask_copy(&online_new, cpu_online_mask);
cpumask_set_cpu(cpu, &online_new);
break;
default:
return NOTIFY_OK; return NOTIFY_OK;
}
mutex_lock(&all_q_mutex); mutex_lock(&all_q_mutex);
...@@ -2130,7 +2166,7 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, ...@@ -2130,7 +2166,7 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
} }
list_for_each_entry(q, &all_q_list, all_q_node) list_for_each_entry(q, &all_q_list, all_q_node)
blk_mq_queue_reinit(q); blk_mq_queue_reinit(q, &online_new);
list_for_each_entry(q, &all_q_list, all_q_node) list_for_each_entry(q, &all_q_list, all_q_node)
blk_mq_unfreeze_queue(q); blk_mq_unfreeze_queue(q);
......
...@@ -51,7 +51,8 @@ void blk_mq_disable_hotplug(void); ...@@ -51,7 +51,8 @@ void blk_mq_disable_hotplug(void);
* CPU -> queue mappings * CPU -> queue mappings
*/ */
extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set); extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues); extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
const struct cpumask *online_mask);
extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int); extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
/* /*
......
...@@ -1486,17 +1486,16 @@ static void loop_handle_cmd(struct loop_cmd *cmd) ...@@ -1486,17 +1486,16 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
{ {
const bool write = cmd->rq->cmd_flags & REQ_WRITE; const bool write = cmd->rq->cmd_flags & REQ_WRITE;
struct loop_device *lo = cmd->rq->q->queuedata; struct loop_device *lo = cmd->rq->q->queuedata;
int ret = -EIO; int ret = 0;
if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
ret = -EIO;
goto failed; goto failed;
}
ret = do_req_filebacked(lo, cmd->rq); ret = do_req_filebacked(lo, cmd->rq);
failed: failed:
if (ret) blk_mq_complete_request(cmd->rq, ret ? -EIO : 0);
cmd->rq->errors = -EIO;
blk_mq_complete_request(cmd->rq);
} }
static void loop_queue_write_work(struct work_struct *work) static void loop_queue_write_work(struct work_struct *work)
......
...@@ -289,7 +289,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd) ...@@ -289,7 +289,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd)
case NULL_IRQ_SOFTIRQ: case NULL_IRQ_SOFTIRQ:
switch (queue_mode) { switch (queue_mode) {
case NULL_Q_MQ: case NULL_Q_MQ:
blk_mq_complete_request(cmd->rq); blk_mq_complete_request(cmd->rq, cmd->rq->errors);
break; break;
case NULL_Q_RQ: case NULL_Q_RQ:
blk_complete_request(cmd->rq); blk_complete_request(cmd->rq);
......
...@@ -618,16 +618,15 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, ...@@ -618,16 +618,15 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
spin_unlock_irqrestore(req->q->queue_lock, flags); spin_unlock_irqrestore(req->q->queue_lock, flags);
return; return;
} }
if (req->cmd_type == REQ_TYPE_DRV_PRIV) { if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
if (cmd_rq->ctx == CMD_CTX_CANCELLED) if (cmd_rq->ctx == CMD_CTX_CANCELLED)
req->errors = -EINTR; status = -EINTR;
else
req->errors = status;
} else { } else {
req->errors = nvme_error_status(status); status = nvme_error_status(status);
} }
} else }
req->errors = 0;
if (req->cmd_type == REQ_TYPE_DRV_PRIV) { if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
u32 result = le32_to_cpup(&cqe->result); u32 result = le32_to_cpup(&cqe->result);
req->special = (void *)(uintptr_t)result; req->special = (void *)(uintptr_t)result;
...@@ -650,7 +649,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, ...@@ -650,7 +649,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
} }
nvme_free_iod(nvmeq->dev, iod); nvme_free_iod(nvmeq->dev, iod);
blk_mq_complete_request(req); blk_mq_complete_request(req, status);
} }
/* length is in bytes. gfp flags indicates whether we may sleep. */ /* length is in bytes. gfp flags indicates whether we may sleep. */
...@@ -863,8 +862,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -863,8 +862,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ns && ns->ms && !blk_integrity_rq(req)) { if (ns && ns->ms && !blk_integrity_rq(req)) {
if (!(ns->pi_type && ns->ms == 8) && if (!(ns->pi_type && ns->ms == 8) &&
req->cmd_type != REQ_TYPE_DRV_PRIV) { req->cmd_type != REQ_TYPE_DRV_PRIV) {
req->errors = -EFAULT; blk_mq_complete_request(req, -EFAULT);
blk_mq_complete_request(req);
return BLK_MQ_RQ_QUEUE_OK; return BLK_MQ_RQ_QUEUE_OK;
} }
} }
...@@ -2439,6 +2437,22 @@ static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn) ...@@ -2439,6 +2437,22 @@ static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
list_sort(NULL, &dev->namespaces, ns_cmp); list_sort(NULL, &dev->namespaces, ns_cmp);
} }
static void nvme_set_irq_hints(struct nvme_dev *dev)
{
struct nvme_queue *nvmeq;
int i;
for (i = 0; i < dev->online_queues; i++) {
nvmeq = dev->queues[i];
if (!nvmeq->tags || !(*nvmeq->tags))
continue;
irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
blk_mq_tags_cpumask(*nvmeq->tags));
}
}
static void nvme_dev_scan(struct work_struct *work) static void nvme_dev_scan(struct work_struct *work)
{ {
struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work); struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
...@@ -2450,6 +2464,7 @@ static void nvme_dev_scan(struct work_struct *work) ...@@ -2450,6 +2464,7 @@ static void nvme_dev_scan(struct work_struct *work)
return; return;
nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn)); nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
kfree(ctrl); kfree(ctrl);
nvme_set_irq_hints(dev);
} }
/* /*
...@@ -2953,22 +2968,6 @@ static const struct file_operations nvme_dev_fops = { ...@@ -2953,22 +2968,6 @@ static const struct file_operations nvme_dev_fops = {
.compat_ioctl = nvme_dev_ioctl, .compat_ioctl = nvme_dev_ioctl,
}; };
static void nvme_set_irq_hints(struct nvme_dev *dev)
{
struct nvme_queue *nvmeq;
int i;
for (i = 0; i < dev->online_queues; i++) {
nvmeq = dev->queues[i];
if (!nvmeq->tags || !(*nvmeq->tags))
continue;
irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
blk_mq_tags_cpumask(*nvmeq->tags));
}
}
static int nvme_dev_start(struct nvme_dev *dev) static int nvme_dev_start(struct nvme_dev *dev)
{ {
int result; int result;
...@@ -3010,8 +3009,6 @@ static int nvme_dev_start(struct nvme_dev *dev) ...@@ -3010,8 +3009,6 @@ static int nvme_dev_start(struct nvme_dev *dev)
if (result) if (result)
goto free_tags; goto free_tags;
nvme_set_irq_hints(dev);
dev->event_limit = 1; dev->event_limit = 1;
return result; return result;
...@@ -3062,7 +3059,6 @@ static int nvme_dev_resume(struct nvme_dev *dev) ...@@ -3062,7 +3059,6 @@ static int nvme_dev_resume(struct nvme_dev *dev)
} else { } else {
nvme_unfreeze_queues(dev); nvme_unfreeze_queues(dev);
nvme_dev_add(dev); nvme_dev_add(dev);
nvme_set_irq_hints(dev);
} }
return 0; return 0;
} }
......
...@@ -144,7 +144,7 @@ static void virtblk_done(struct virtqueue *vq) ...@@ -144,7 +144,7 @@ static void virtblk_done(struct virtqueue *vq)
do { do {
virtqueue_disable_cb(vq); virtqueue_disable_cb(vq);
while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
blk_mq_complete_request(vbr->req); blk_mq_complete_request(vbr->req, vbr->req->errors);
req_done = true; req_done = true;
} }
if (unlikely(virtqueue_is_broken(vq))) if (unlikely(virtqueue_is_broken(vq)))
......
...@@ -212,6 +212,9 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, ...@@ -212,6 +212,9 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
static int xen_blkif_disconnect(struct xen_blkif *blkif) static int xen_blkif_disconnect(struct xen_blkif *blkif)
{ {
struct pending_req *req, *n;
int i = 0, j;
if (blkif->xenblkd) { if (blkif->xenblkd) {
kthread_stop(blkif->xenblkd); kthread_stop(blkif->xenblkd);
wake_up(&blkif->shutdown_wq); wake_up(&blkif->shutdown_wq);
...@@ -238,13 +241,28 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) ...@@ -238,13 +241,28 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
/* Remove all persistent grants and the cache of ballooned pages. */ /* Remove all persistent grants and the cache of ballooned pages. */
xen_blkbk_free_caches(blkif); xen_blkbk_free_caches(blkif);
/* Check that there is no request in use */
list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
list_del(&req->free_list);
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
kfree(req->segments[j]);
for (j = 0; j < MAX_INDIRECT_PAGES; j++)
kfree(req->indirect_pages[j]);
kfree(req);
i++;
}
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
blkif->nr_ring_pages = 0;
return 0; return 0;
} }
static void xen_blkif_free(struct xen_blkif *blkif) static void xen_blkif_free(struct xen_blkif *blkif)
{ {
struct pending_req *req, *n;
int i = 0, j;
xen_blkif_disconnect(blkif); xen_blkif_disconnect(blkif);
xen_vbd_free(&blkif->vbd); xen_vbd_free(&blkif->vbd);
...@@ -257,22 +275,6 @@ static void xen_blkif_free(struct xen_blkif *blkif) ...@@ -257,22 +275,6 @@ static void xen_blkif_free(struct xen_blkif *blkif)
BUG_ON(!list_empty(&blkif->free_pages)); BUG_ON(!list_empty(&blkif->free_pages));
BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
/* Check that there is no request in use */
list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
list_del(&req->free_list);
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
kfree(req->segments[j]);
for (j = 0; j < MAX_INDIRECT_PAGES; j++)
kfree(req->indirect_pages[j]);
kfree(req);
i++;
}
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
kmem_cache_free(xen_blkif_cachep, blkif); kmem_cache_free(xen_blkif_cachep, blkif);
} }
......
...@@ -1142,6 +1142,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) ...@@ -1142,6 +1142,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
RING_IDX i, rp; RING_IDX i, rp;
unsigned long flags; unsigned long flags;
struct blkfront_info *info = (struct blkfront_info *)dev_id; struct blkfront_info *info = (struct blkfront_info *)dev_id;
int error;
spin_lock_irqsave(&info->io_lock, flags); spin_lock_irqsave(&info->io_lock, flags);
...@@ -1182,37 +1183,37 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) ...@@ -1182,37 +1183,37 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
continue; continue;
} }
req->errors = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
switch (bret->operation) { switch (bret->operation) {
case BLKIF_OP_DISCARD: case BLKIF_OP_DISCARD:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
struct request_queue *rq = info->rq; struct request_queue *rq = info->rq;
printk(KERN_WARNING "blkfront: %s: %s op failed\n", printk(KERN_WARNING "blkfront: %s: %s op failed\n",
info->gd->disk_name, op_name(bret->operation)); info->gd->disk_name, op_name(bret->operation));
req->errors = -EOPNOTSUPP; error = -EOPNOTSUPP;
info->feature_discard = 0; info->feature_discard = 0;
info->feature_secdiscard = 0; info->feature_secdiscard = 0;
queue_flag_clear(QUEUE_FLAG_DISCARD, rq); queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq); queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
} }
blk_mq_complete_request(req); blk_mq_complete_request(req, error);
break; break;
case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_WRITE_BARRIER:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
printk(KERN_WARNING "blkfront: %s: %s op failed\n", printk(KERN_WARNING "blkfront: %s: %s op failed\n",
info->gd->disk_name, op_name(bret->operation)); info->gd->disk_name, op_name(bret->operation));
req->errors = -EOPNOTSUPP; error = -EOPNOTSUPP;
} }
if (unlikely(bret->status == BLKIF_RSP_ERROR && if (unlikely(bret->status == BLKIF_RSP_ERROR &&
info->shadow[id].req.u.rw.nr_segments == 0)) { info->shadow[id].req.u.rw.nr_segments == 0)) {
printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
info->gd->disk_name, op_name(bret->operation)); info->gd->disk_name, op_name(bret->operation));
req->errors = -EOPNOTSUPP; error = -EOPNOTSUPP;
} }
if (unlikely(req->errors)) { if (unlikely(error)) {
if (req->errors == -EOPNOTSUPP) if (error == -EOPNOTSUPP)
req->errors = 0; error = 0;
info->feature_flush = 0; info->feature_flush = 0;
xlvbd_flush(info); xlvbd_flush(info);
} }
...@@ -1223,7 +1224,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) ...@@ -1223,7 +1224,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
"request: %x\n", bret->status); "request: %x\n", bret->status);
blk_mq_complete_request(req); blk_mq_complete_request(req, error);
break; break;
default: default:
BUG(); BUG();
......
...@@ -1957,7 +1957,7 @@ static int scsi_mq_prep_fn(struct request *req) ...@@ -1957,7 +1957,7 @@ static int scsi_mq_prep_fn(struct request *req)
static void scsi_mq_done(struct scsi_cmnd *cmd) static void scsi_mq_done(struct scsi_cmnd *cmd)
{ {
trace_scsi_dispatch_cmd_done(cmd); trace_scsi_dispatch_cmd_done(cmd);
blk_mq_complete_request(cmd->request); blk_mq_complete_request(cmd->request, cmd->request->errors);
} }
static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
......
...@@ -145,7 +145,6 @@ enum { ...@@ -145,7 +145,6 @@ enum {
BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_SHOULD_MERGE = 1 << 0,
BLK_MQ_F_TAG_SHARED = 1 << 1, BLK_MQ_F_TAG_SHARED = 1 << 1,
BLK_MQ_F_SG_MERGE = 1 << 2, BLK_MQ_F_SG_MERGE = 1 << 2,
BLK_MQ_F_SYSFS_UP = 1 << 3,
BLK_MQ_F_DEFER_ISSUE = 1 << 4, BLK_MQ_F_DEFER_ISSUE = 1 << 4,
BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
BLK_MQ_F_ALLOC_POLICY_BITS = 1, BLK_MQ_F_ALLOC_POLICY_BITS = 1,
...@@ -215,7 +214,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); ...@@ -215,7 +214,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
void blk_mq_cancel_requeue_work(struct request_queue *q); void blk_mq_cancel_requeue_work(struct request_queue *q);
void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_kick_requeue_list(struct request_queue *q);
void blk_mq_abort_requeue_list(struct request_queue *q); void blk_mq_abort_requeue_list(struct request_queue *q);
void blk_mq_complete_request(struct request *rq); void blk_mq_complete_request(struct request *rq, int error);
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
...@@ -224,8 +223,6 @@ void blk_mq_start_hw_queues(struct request_queue *q); ...@@ -224,8 +223,6 @@ void blk_mq_start_hw_queues(struct request_queue *q);
void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async);
void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
void *priv);
void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
void *priv); void *priv);
void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_freeze_queue(struct request_queue *q);
......
...@@ -456,6 +456,8 @@ struct request_queue { ...@@ -456,6 +456,8 @@ struct request_queue {
struct blk_mq_tag_set *tag_set; struct blk_mq_tag_set *tag_set;
struct list_head tag_set_list; struct list_head tag_set_list;
struct bio_set *bio_split; struct bio_set *bio_split;
bool mq_sysfs_init_done;
}; };
#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment