Commit 2a5a24aa authored by Ming Lei's avatar Ming Lei Committed by Martin K. Petersen

scsi: blk-mq: Return budget token from .get_budget callback

SCSI uses a global atomic variable to track queue depth for each
LUN/request queue.

This doesn't scale well when there are lots of CPU cores and the disk is
very fast. It has been observed that IOPS is affected a lot by tracking
queue depth via sdev->device_busy in the I/O path.

Return budget token from .get_budget callback. The budget token can be
passed to driver so that we can replace the atomic variable with
sbitmap_queue and alleviate the scaling problems that way.

Link: https://lore.kernel.org/r/20210122023317.687987-9-ming.lei@redhat.com
Cc: Omar Sandoval <osandov@fb.com>
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Sumanesh Samanta <sumanesh.samanta@broadcom.com>
Cc: Ewan D. Milne <emilne@redhat.com>
Tested-by: default avatarSumanesh Samanta <sumanesh.samanta@broadcom.com>
Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Signed-off-by: default avatarMing Lei <ming.lei@redhat.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent d022d18c
...@@ -131,6 +131,7 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) ...@@ -131,6 +131,7 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
do { do {
struct request *rq; struct request *rq;
int budget_token;
if (e->type->ops.has_work && !e->type->ops.has_work(hctx)) if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
break; break;
...@@ -140,12 +141,13 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) ...@@ -140,12 +141,13 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
break; break;
} }
if (!blk_mq_get_dispatch_budget(q)) budget_token = blk_mq_get_dispatch_budget(q);
if (budget_token < 0)
break; break;
rq = e->type->ops.dispatch_request(hctx); rq = e->type->ops.dispatch_request(hctx);
if (!rq) { if (!rq) {
blk_mq_put_dispatch_budget(q); blk_mq_put_dispatch_budget(q, budget_token);
/* /*
* We're releasing without dispatching. Holding the * We're releasing without dispatching. Holding the
* budget could have blocked any "hctx"s with the * budget could have blocked any "hctx"s with the
...@@ -157,6 +159,8 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) ...@@ -157,6 +159,8 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
break; break;
} }
blk_mq_set_rq_budget_token(rq, budget_token);
/* /*
* Now this rq owns the budget which has to be released * Now this rq owns the budget which has to be released
* if this rq won't be queued to driver via .queue_rq() * if this rq won't be queued to driver via .queue_rq()
...@@ -230,6 +234,8 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) ...@@ -230,6 +234,8 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
struct request *rq; struct request *rq;
do { do {
int budget_token;
if (!list_empty_careful(&hctx->dispatch)) { if (!list_empty_careful(&hctx->dispatch)) {
ret = -EAGAIN; ret = -EAGAIN;
break; break;
...@@ -238,12 +244,13 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) ...@@ -238,12 +244,13 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
if (!sbitmap_any_bit_set(&hctx->ctx_map)) if (!sbitmap_any_bit_set(&hctx->ctx_map))
break; break;
if (!blk_mq_get_dispatch_budget(q)) budget_token = blk_mq_get_dispatch_budget(q);
if (budget_token < 0)
break; break;
rq = blk_mq_dequeue_from_ctx(hctx, ctx); rq = blk_mq_dequeue_from_ctx(hctx, ctx);
if (!rq) { if (!rq) {
blk_mq_put_dispatch_budget(q); blk_mq_put_dispatch_budget(q, budget_token);
/* /*
* We're releasing without dispatching. Holding the * We're releasing without dispatching. Holding the
* budget could have blocked any "hctx"s with the * budget could have blocked any "hctx"s with the
...@@ -255,6 +262,8 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) ...@@ -255,6 +262,8 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
break; break;
} }
blk_mq_set_rq_budget_token(rq, budget_token);
/* /*
* Now this rq owns the budget which has to be released * Now this rq owns the budget which has to be released
* if this rq won't be queued to driver via .queue_rq() * if this rq won't be queued to driver via .queue_rq()
......
...@@ -1277,11 +1277,16 @@ static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq, ...@@ -1277,11 +1277,16 @@ static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq,
bool need_budget) bool need_budget)
{ {
struct blk_mq_hw_ctx *hctx = rq->mq_hctx; struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
int budget_token = -1;
if (need_budget && !blk_mq_get_dispatch_budget(rq->q)) { if (need_budget) {
budget_token = blk_mq_get_dispatch_budget(rq->q);
if (budget_token < 0) {
blk_mq_put_driver_tag(rq); blk_mq_put_driver_tag(rq);
return PREP_DISPATCH_NO_BUDGET; return PREP_DISPATCH_NO_BUDGET;
} }
blk_mq_set_rq_budget_token(rq, budget_token);
}
if (!blk_mq_get_driver_tag(rq)) { if (!blk_mq_get_driver_tag(rq)) {
/* /*
...@@ -1297,7 +1302,7 @@ static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq, ...@@ -1297,7 +1302,7 @@ static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq,
* together during handling partial dispatch * together during handling partial dispatch
*/ */
if (need_budget) if (need_budget)
blk_mq_put_dispatch_budget(rq->q); blk_mq_put_dispatch_budget(rq->q, budget_token);
return PREP_DISPATCH_NO_TAG; return PREP_DISPATCH_NO_TAG;
} }
} }
...@@ -1307,12 +1312,16 @@ static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq, ...@@ -1307,12 +1312,16 @@ static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq,
/* release all allocated budgets before calling to blk_mq_dispatch_rq_list */ /* release all allocated budgets before calling to blk_mq_dispatch_rq_list */
static void blk_mq_release_budgets(struct request_queue *q, static void blk_mq_release_budgets(struct request_queue *q,
unsigned int nr_budgets) struct list_head *list)
{ {
int i; struct request *rq;
for (i = 0; i < nr_budgets; i++) list_for_each_entry(rq, list, queuelist) {
blk_mq_put_dispatch_budget(q); int budget_token = blk_mq_get_rq_budget_token(rq);
if (budget_token >= 0)
blk_mq_put_dispatch_budget(q, budget_token);
}
} }
/* /*
...@@ -1410,7 +1419,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, ...@@ -1410,7 +1419,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED); (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED);
bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET; bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET;
blk_mq_release_budgets(q, nr_budgets); if (nr_budgets)
blk_mq_release_budgets(q, list);
spin_lock(&hctx->lock); spin_lock(&hctx->lock);
list_splice_tail_init(list, &hctx->dispatch); list_splice_tail_init(list, &hctx->dispatch);
...@@ -2009,6 +2019,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, ...@@ -2009,6 +2019,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
{ {
struct request_queue *q = rq->q; struct request_queue *q = rq->q;
bool run_queue = true; bool run_queue = true;
int budget_token;
/* /*
* RCU or SRCU read lock is needed before checking quiesced flag. * RCU or SRCU read lock is needed before checking quiesced flag.
...@@ -2026,11 +2037,14 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, ...@@ -2026,11 +2037,14 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
if (q->elevator && !bypass_insert) if (q->elevator && !bypass_insert)
goto insert; goto insert;
if (!blk_mq_get_dispatch_budget(q)) budget_token = blk_mq_get_dispatch_budget(q);
if (budget_token < 0)
goto insert; goto insert;
blk_mq_set_rq_budget_token(rq, budget_token);
if (!blk_mq_get_driver_tag(rq)) { if (!blk_mq_get_driver_tag(rq)) {
blk_mq_put_dispatch_budget(q); blk_mq_put_dispatch_budget(q, budget_token);
goto insert; goto insert;
} }
......
...@@ -187,17 +187,34 @@ unsigned int blk_mq_in_flight(struct request_queue *q, ...@@ -187,17 +187,34 @@ unsigned int blk_mq_in_flight(struct request_queue *q,
void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part,
unsigned int inflight[2]); unsigned int inflight[2]);
static inline void blk_mq_put_dispatch_budget(struct request_queue *q) static inline void blk_mq_put_dispatch_budget(struct request_queue *q,
int budget_token)
{ {
if (q->mq_ops->put_budget) if (q->mq_ops->put_budget)
q->mq_ops->put_budget(q); q->mq_ops->put_budget(q, budget_token);
} }
static inline bool blk_mq_get_dispatch_budget(struct request_queue *q) static inline int blk_mq_get_dispatch_budget(struct request_queue *q)
{ {
if (q->mq_ops->get_budget) if (q->mq_ops->get_budget)
return q->mq_ops->get_budget(q); return q->mq_ops->get_budget(q);
return true; return 0;
}
static inline void blk_mq_set_rq_budget_token(struct request *rq, int token)
{
if (token < 0)
return;
if (rq->q->mq_ops->set_rq_budget_token)
rq->q->mq_ops->set_rq_budget_token(rq, token);
}
static inline int blk_mq_get_rq_budget_token(struct request *rq)
{
if (rq->q->mq_ops->get_rq_budget_token)
return rq->q->mq_ops->get_rq_budget_token(rq);
return -1;
} }
static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx)
......
...@@ -329,6 +329,7 @@ void scsi_device_unbusy(struct scsi_device *sdev, struct scsi_cmnd *cmd) ...@@ -329,6 +329,7 @@ void scsi_device_unbusy(struct scsi_device *sdev, struct scsi_cmnd *cmd)
atomic_dec(&starget->target_busy); atomic_dec(&starget->target_busy);
atomic_dec(&sdev->device_busy); atomic_dec(&sdev->device_busy);
cmd->budget_token = -1;
} }
static void scsi_kick_queue(struct request_queue *q) static void scsi_kick_queue(struct request_queue *q)
...@@ -1143,6 +1144,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) ...@@ -1143,6 +1144,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
unsigned long jiffies_at_alloc; unsigned long jiffies_at_alloc;
int retries, to_clear; int retries, to_clear;
bool in_flight; bool in_flight;
int budget_token = cmd->budget_token;
if (!blk_rq_is_scsi(rq) && !(flags & SCMD_INITIALIZED)) { if (!blk_rq_is_scsi(rq) && !(flags & SCMD_INITIALIZED)) {
flags |= SCMD_INITIALIZED; flags |= SCMD_INITIALIZED;
...@@ -1171,6 +1173,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) ...@@ -1171,6 +1173,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
cmd->retries = retries; cmd->retries = retries;
if (in_flight) if (in_flight)
__set_bit(SCMD_STATE_INFLIGHT, &cmd->state); __set_bit(SCMD_STATE_INFLIGHT, &cmd->state);
cmd->budget_token = budget_token;
} }
...@@ -1605,19 +1608,19 @@ static void scsi_mq_done(struct scsi_cmnd *cmd) ...@@ -1605,19 +1608,19 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
blk_mq_complete_request(cmd->request); blk_mq_complete_request(cmd->request);
} }
static void scsi_mq_put_budget(struct request_queue *q) static void scsi_mq_put_budget(struct request_queue *q, int budget_token)
{ {
struct scsi_device *sdev = q->queuedata; struct scsi_device *sdev = q->queuedata;
atomic_dec(&sdev->device_busy); atomic_dec(&sdev->device_busy);
} }
static bool scsi_mq_get_budget(struct request_queue *q) static int scsi_mq_get_budget(struct request_queue *q)
{ {
struct scsi_device *sdev = q->queuedata; struct scsi_device *sdev = q->queuedata;
if (scsi_dev_queue_ready(q, sdev)) if (scsi_dev_queue_ready(q, sdev))
return true; return 0;
atomic_inc(&sdev->restarts); atomic_inc(&sdev->restarts);
...@@ -1639,7 +1642,7 @@ static bool scsi_mq_get_budget(struct request_queue *q) ...@@ -1639,7 +1642,7 @@ static bool scsi_mq_get_budget(struct request_queue *q)
if (unlikely(atomic_read(&sdev->device_busy) == 0 && if (unlikely(atomic_read(&sdev->device_busy) == 0 &&
!scsi_device_blocked(sdev))) !scsi_device_blocked(sdev)))
blk_mq_delay_run_hw_queues(sdev->request_queue, SCSI_QUEUE_DELAY); blk_mq_delay_run_hw_queues(sdev->request_queue, SCSI_QUEUE_DELAY);
return false; return -1;
} }
static void scsi_mq_set_rq_budget_token(struct request *req, int token) static void scsi_mq_set_rq_budget_token(struct request *req, int token)
...@@ -1667,6 +1670,8 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -1667,6 +1670,8 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_status_t ret; blk_status_t ret;
int reason; int reason;
WARN_ON_ONCE(cmd->budget_token < 0);
/* /*
* If the device is not in running state we will reject some or all * If the device is not in running state we will reject some or all
* commands. * commands.
...@@ -1718,7 +1723,8 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -1718,7 +1723,8 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
if (scsi_target(sdev)->can_queue > 0) if (scsi_target(sdev)->can_queue > 0)
atomic_dec(&scsi_target(sdev)->target_busy); atomic_dec(&scsi_target(sdev)->target_busy);
out_put_budget: out_put_budget:
scsi_mq_put_budget(q); scsi_mq_put_budget(q, cmd->budget_token);
cmd->budget_token = -1;
switch (ret) { switch (ret) {
case BLK_STS_OK: case BLK_STS_OK:
break; break;
......
...@@ -306,12 +306,12 @@ struct blk_mq_ops { ...@@ -306,12 +306,12 @@ struct blk_mq_ops {
* reserved budget. Also we have to handle failure case * reserved budget. Also we have to handle failure case
* of .get_budget for avoiding I/O deadlock. * of .get_budget for avoiding I/O deadlock.
*/ */
bool (*get_budget)(struct request_queue *); int (*get_budget)(struct request_queue *);
/** /**
* @put_budget: Release the reserved budget. * @put_budget: Release the reserved budget.
*/ */
void (*put_budget)(struct request_queue *); void (*put_budget)(struct request_queue *, int);
/* /*
* @set_rq_budget_toekn: store rq's budget token * @set_rq_budget_toekn: store rq's budget token
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment