Commit e3a2b3f9 authored by Jens Axboe's avatar Jens Axboe

blk-mq: allow changing of queue depth through sysfs

For request_fn based devices, the block layer exports a 'nr_requests'
file through sysfs to allow adjusting of queue depth on the fly.
Currently this returns -EINVAL for blk-mq, since it's not wired up.
Wire this up for blk-mq, so that it now also always dynamic
adjustments of the allowed queue depth for any given block device
managed by blk-mq.
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 64b14519
...@@ -848,6 +848,47 @@ static void freed_request(struct request_list *rl, unsigned int flags) ...@@ -848,6 +848,47 @@ static void freed_request(struct request_list *rl, unsigned int flags)
__freed_request(rl, sync ^ 1); __freed_request(rl, sync ^ 1);
} }
int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
{
struct request_list *rl;
spin_lock_irq(q->queue_lock);
q->nr_requests = nr;
blk_queue_congestion_threshold(q);
/* congestion isn't cgroup aware and follows root blkcg for now */
rl = &q->root_rl;
if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
blk_set_queue_congested(q, BLK_RW_SYNC);
else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, BLK_RW_SYNC);
if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
blk_set_queue_congested(q, BLK_RW_ASYNC);
else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, BLK_RW_ASYNC);
blk_queue_for_each_rl(rl, q) {
if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
blk_set_rl_full(rl, BLK_RW_SYNC);
} else {
blk_clear_rl_full(rl, BLK_RW_SYNC);
wake_up(&rl->wait[BLK_RW_SYNC]);
}
if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
blk_set_rl_full(rl, BLK_RW_ASYNC);
} else {
blk_clear_rl_full(rl, BLK_RW_ASYNC);
wake_up(&rl->wait[BLK_RW_ASYNC]);
}
}
spin_unlock_irq(q->queue_lock);
return 0;
}
/* /*
* Determine if elevator data should be initialized when allocating the * Determine if elevator data should be initialized when allocating the
* request associated with @bio. * request associated with @bio.
......
...@@ -57,23 +57,13 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) ...@@ -57,23 +57,13 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
} }
/* /*
* If a previously busy queue goes inactive, potential waiters could now * Wakeup all potentially sleeping on normal (non-reserved) tags
* be allowed to queue. Wake them up and check.
*/ */
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
{ {
struct blk_mq_tags *tags = hctx->tags;
struct blk_mq_bitmap_tags *bt; struct blk_mq_bitmap_tags *bt;
int i, wake_index; int i, wake_index;
if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
return;
atomic_dec(&tags->active_queues);
/*
* Will only throttle depth on non-reserved tags
*/
bt = &tags->bitmap_tags; bt = &tags->bitmap_tags;
wake_index = bt->wake_index; wake_index = bt->wake_index;
for (i = 0; i < BT_WAIT_QUEUES; i++) { for (i = 0; i < BT_WAIT_QUEUES; i++) {
...@@ -86,6 +76,22 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) ...@@ -86,6 +76,22 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
} }
} }
/*
* If a previously busy queue goes inactive, potential waiters could now
* be allowed to queue. Wake them up and check.
*/
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_tags *tags = hctx->tags;
if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
return;
atomic_dec(&tags->active_queues);
blk_mq_tag_wakeup_all(tags);
}
/* /*
* For shared tag users, we track the number of currently active users * For shared tag users, we track the number of currently active users
* and attempt to provide a fair share of the tag depth for each of them. * and attempt to provide a fair share of the tag depth for each of them.
...@@ -408,6 +414,28 @@ static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt) ...@@ -408,6 +414,28 @@ static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt)
return bt->depth - used; return bt->depth - used;
} }
static void bt_update_count(struct blk_mq_bitmap_tags *bt,
unsigned int depth)
{
unsigned int tags_per_word = 1U << bt->bits_per_word;
unsigned int map_depth = depth;
if (depth) {
int i;
for (i = 0; i < bt->map_nr; i++) {
bt->map[i].depth = min(map_depth, tags_per_word);
map_depth -= bt->map[i].depth;
}
}
bt->wake_cnt = BT_WAIT_BATCH;
if (bt->wake_cnt > depth / 4)
bt->wake_cnt = max(1U, depth / 4);
bt->depth = depth;
}
static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
int node, bool reserved) int node, bool reserved)
{ {
...@@ -420,7 +448,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, ...@@ -420,7 +448,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
* condition. * condition.
*/ */
if (depth) { if (depth) {
unsigned int nr, i, map_depth, tags_per_word; unsigned int nr, tags_per_word;
tags_per_word = (1 << bt->bits_per_word); tags_per_word = (1 << bt->bits_per_word);
...@@ -444,11 +472,6 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, ...@@ -444,11 +472,6 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
return -ENOMEM; return -ENOMEM;
bt->map_nr = nr; bt->map_nr = nr;
map_depth = depth;
for (i = 0; i < nr; i++) {
bt->map[i].depth = min(map_depth, tags_per_word);
map_depth -= tags_per_word;
}
} }
bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL); bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL);
...@@ -460,11 +483,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, ...@@ -460,11 +483,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
for (i = 0; i < BT_WAIT_QUEUES; i++) for (i = 0; i < BT_WAIT_QUEUES; i++)
init_waitqueue_head(&bt->bs[i].wait); init_waitqueue_head(&bt->bs[i].wait);
bt->wake_cnt = BT_WAIT_BATCH; bt_update_count(bt, depth);
if (bt->wake_cnt > depth / 4)
bt->wake_cnt = max(1U, depth / 4);
bt->depth = depth;
return 0; return 0;
} }
...@@ -525,6 +544,21 @@ void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag) ...@@ -525,6 +544,21 @@ void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag)
*tag = prandom_u32() % depth; *tag = prandom_u32() % depth;
} }
int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
{
tdepth -= tags->nr_reserved_tags;
if (tdepth > tags->nr_tags)
return -EINVAL;
/*
* Don't need (or can't) update reserved tags here, they remain
* static and should never need resizing.
*/
bt_update_count(&tags->bitmap_tags, tdepth);
blk_mq_tag_wakeup_all(tags);
return 0;
}
ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
{ {
char *orig_page = page; char *orig_page = page;
......
...@@ -55,6 +55,7 @@ extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data ...@@ -55,6 +55,7 @@ extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data
extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag); extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);
enum { enum {
BLK_MQ_TAG_CACHE_MIN = 1, BLK_MQ_TAG_CACHE_MIN = 1,
......
...@@ -1789,6 +1789,28 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) ...@@ -1789,6 +1789,28 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
} }
EXPORT_SYMBOL(blk_mq_free_tag_set); EXPORT_SYMBOL(blk_mq_free_tag_set);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
{
struct blk_mq_tag_set *set = q->tag_set;
struct blk_mq_hw_ctx *hctx;
int i, ret;
if (!set || nr > set->queue_depth)
return -EINVAL;
ret = 0;
queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_tag_update_depth(hctx->tags, nr);
if (ret)
break;
}
if (!ret)
q->nr_requests = nr;
return ret;
}
void blk_mq_disable_hotplug(void) void blk_mq_disable_hotplug(void)
{ {
mutex_lock(&all_q_mutex); mutex_lock(&all_q_mutex);
......
...@@ -32,6 +32,7 @@ void blk_mq_drain_queue(struct request_queue *q); ...@@ -32,6 +32,7 @@ void blk_mq_drain_queue(struct request_queue *q);
void blk_mq_free_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q);
void blk_mq_clone_flush_request(struct request *flush_rq, void blk_mq_clone_flush_request(struct request *flush_rq,
struct request *orig_rq); struct request *orig_rq);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
/* /*
* CPU hotplug helpers * CPU hotplug helpers
......
...@@ -48,11 +48,10 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page) ...@@ -48,11 +48,10 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page)
static ssize_t static ssize_t
queue_requests_store(struct request_queue *q, const char *page, size_t count) queue_requests_store(struct request_queue *q, const char *page, size_t count)
{ {
struct request_list *rl;
unsigned long nr; unsigned long nr;
int ret; int ret, err;
if (!q->request_fn) if (!q->request_fn && !q->mq_ops)
return -EINVAL; return -EINVAL;
ret = queue_var_store(&nr, page, count); ret = queue_var_store(&nr, page, count);
...@@ -62,40 +61,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) ...@@ -62,40 +61,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
if (nr < BLKDEV_MIN_RQ) if (nr < BLKDEV_MIN_RQ)
nr = BLKDEV_MIN_RQ; nr = BLKDEV_MIN_RQ;
spin_lock_irq(q->queue_lock); if (q->request_fn)
q->nr_requests = nr; err = blk_update_nr_requests(q, nr);
blk_queue_congestion_threshold(q); else
err = blk_mq_update_nr_requests(q, nr);
/* congestion isn't cgroup aware and follows root blkcg for now */
rl = &q->root_rl;
if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
blk_set_queue_congested(q, BLK_RW_SYNC);
else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, BLK_RW_SYNC);
if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
blk_set_queue_congested(q, BLK_RW_ASYNC);
else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, BLK_RW_ASYNC);
blk_queue_for_each_rl(rl, q) {
if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
blk_set_rl_full(rl, BLK_RW_SYNC);
} else {
blk_clear_rl_full(rl, BLK_RW_SYNC);
wake_up(&rl->wait[BLK_RW_SYNC]);
}
if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { if (err)
blk_set_rl_full(rl, BLK_RW_ASYNC); return err;
} else {
blk_clear_rl_full(rl, BLK_RW_ASYNC);
wake_up(&rl->wait[BLK_RW_ASYNC]);
}
}
spin_unlock_irq(q->queue_lock);
return ret; return ret;
} }
......
...@@ -188,6 +188,8 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) ...@@ -188,6 +188,8 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
return q->nr_congestion_off; return q->nr_congestion_off;
} }
extern int blk_update_nr_requests(struct request_queue *, unsigned int);
/* /*
* Contribute to IO statistics IFF: * Contribute to IO statistics IFF:
* *
......
...@@ -63,7 +63,7 @@ struct blk_mq_hw_ctx { ...@@ -63,7 +63,7 @@ struct blk_mq_hw_ctx {
struct blk_mq_tag_set { struct blk_mq_tag_set {
struct blk_mq_ops *ops; struct blk_mq_ops *ops;
unsigned int nr_hw_queues; unsigned int nr_hw_queues;
unsigned int queue_depth; unsigned int queue_depth; /* max hw supported */
unsigned int reserved_tags; unsigned int reserved_tags;
unsigned int cmd_size; /* per-request extra data */ unsigned int cmd_size; /* per-request extra data */
int numa_node; int numa_node;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment