Commit 9937a5e2 authored by Jens Axboe's avatar Jens Axboe

scsi: remove performance regression due to async queue run

Commit c21e6beb removed our queue request_fn re-enter
protection, and defaulted to always running the queues from
kblockd to be safe. This was a known potential slow down,
but should be safe.

Unfortunately this is causing big performance regressions for
some, so we need to improve this logic. Looking into the details
of the re-enter, the real issue is on requeue of requests.

Requeue of requests upon seeing a BUSY condition from the device
ends up re-running the queue, causing traces like this:

scsi_request_fn()
        scsi_dispatch_cmd()
                scsi_queue_insert()
                        __scsi_queue_insert()
                                scsi_run_queue()
					scsi_request_fn()
						...

potentially causing the issue we want to avoid. So special
case the requeue re-run of the queue, but improve it to offload
the entire run of local queue and starved queue from a single
workqueue callback. This is a lot better than potentially
kicking off a workqueue run for each device seen.

This also fixes the issue of the local device going into recursion,
since the above mentioned commit never moved that queue run out
of line.
Signed-off-by: default avatarJens Axboe <jaxboe@fusionio.com>
parent 70087dc3
...@@ -74,8 +74,6 @@ struct kmem_cache *scsi_sdb_cache; ...@@ -74,8 +74,6 @@ struct kmem_cache *scsi_sdb_cache;
*/ */
#define SCSI_QUEUE_DELAY 3 #define SCSI_QUEUE_DELAY 3
static void scsi_run_queue(struct request_queue *q);
/* /*
* Function: scsi_unprep_request() * Function: scsi_unprep_request()
* *
...@@ -161,7 +159,7 @@ static int __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy) ...@@ -161,7 +159,7 @@ static int __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
blk_requeue_request(q, cmd->request); blk_requeue_request(q, cmd->request);
spin_unlock_irqrestore(q->queue_lock, flags); spin_unlock_irqrestore(q->queue_lock, flags);
scsi_run_queue(q); kblockd_schedule_work(q, &device->requeue_work);
return 0; return 0;
} }
...@@ -433,7 +431,11 @@ static void scsi_run_queue(struct request_queue *q) ...@@ -433,7 +431,11 @@ static void scsi_run_queue(struct request_queue *q)
continue; continue;
} }
blk_run_queue_async(sdev->request_queue); spin_unlock(shost->host_lock);
spin_lock(sdev->request_queue->queue_lock);
__blk_run_queue(sdev->request_queue);
spin_unlock(sdev->request_queue->queue_lock);
spin_lock(shost->host_lock);
} }
/* put any unprocessed entries back */ /* put any unprocessed entries back */
list_splice(&starved_list, &shost->starved_list); list_splice(&starved_list, &shost->starved_list);
...@@ -442,6 +444,16 @@ static void scsi_run_queue(struct request_queue *q) ...@@ -442,6 +444,16 @@ static void scsi_run_queue(struct request_queue *q)
blk_run_queue(q); blk_run_queue(q);
} }
void scsi_requeue_run_queue(struct work_struct *work)
{
struct scsi_device *sdev;
struct request_queue *q;
sdev = container_of(work, struct scsi_device, requeue_work);
q = sdev->request_queue;
scsi_run_queue(q);
}
/* /*
* Function: scsi_requeue_command() * Function: scsi_requeue_command()
* *
......
...@@ -242,6 +242,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, ...@@ -242,6 +242,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
int display_failure_msg = 1, ret; int display_failure_msg = 1, ret;
struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
extern void scsi_evt_thread(struct work_struct *work); extern void scsi_evt_thread(struct work_struct *work);
extern void scsi_requeue_run_queue(struct work_struct *work);
sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size, sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size,
GFP_ATOMIC); GFP_ATOMIC);
...@@ -264,6 +265,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, ...@@ -264,6 +265,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
INIT_LIST_HEAD(&sdev->event_list); INIT_LIST_HEAD(&sdev->event_list);
spin_lock_init(&sdev->list_lock); spin_lock_init(&sdev->list_lock);
INIT_WORK(&sdev->event_work, scsi_evt_thread); INIT_WORK(&sdev->event_work, scsi_evt_thread);
INIT_WORK(&sdev->requeue_work, scsi_requeue_run_queue);
sdev->sdev_gendev.parent = get_device(&starget->dev); sdev->sdev_gendev.parent = get_device(&starget->dev);
sdev->sdev_target = starget; sdev->sdev_target = starget;
......
...@@ -169,6 +169,7 @@ struct scsi_device { ...@@ -169,6 +169,7 @@ struct scsi_device {
sdev_dev; sdev_dev;
struct execute_work ew; /* used to get process context on put */ struct execute_work ew; /* used to get process context on put */
struct work_struct requeue_work;
struct scsi_dh_data *scsi_dh_data; struct scsi_dh_data *scsi_dh_data;
enum scsi_device_state sdev_state; enum scsi_device_state sdev_state;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment