Commit 8a2278ce authored by Stefan Haberland's avatar Stefan Haberland Committed by Jens Axboe

s390/dasd: fix hanging device after request requeue

The DASD device driver has a function to requeue requests to the
blocklayer.
This function is used in various cases when basic settings for the device
have to be changed like High Performance Ficon related parameters or copy
pair settings.

The functions iterates over the device->ccw_queue and also removes the
requests from the block->ccw_queue.
In case the device is started on an alias device instead of the base
device it might be removed from the block->ccw_queue without having it
canceled properly before. This might lead to a hanging device since the
request is no longer on a queue and can not be handled properly.

Fix by iterating over the block->ccw_queue instead of the
device->ccw_queue. This will take care of all blocklayer related requests
and handle them on all associated DASD devices.
Signed-off-by: default avatarStefan Haberland <sth@linux.ibm.com>
Reviewed-by: default avatarJan Hoeppner <hoeppner@linux.ibm.com>
Link: https://lore.kernel.org/r/20230721193647.3889634-4-sth@linux.ibm.comSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent acea28a6
...@@ -2943,41 +2943,32 @@ static void _dasd_wake_block_flush_cb(struct dasd_ccw_req *cqr, void *data) ...@@ -2943,41 +2943,32 @@ static void _dasd_wake_block_flush_cb(struct dasd_ccw_req *cqr, void *data)
* Requeue a request back to the block request queue * Requeue a request back to the block request queue
* only works for block requests * only works for block requests
*/ */
static int _dasd_requeue_request(struct dasd_ccw_req *cqr) static void _dasd_requeue_request(struct dasd_ccw_req *cqr)
{ {
struct dasd_block *block = cqr->block;
struct request *req; struct request *req;
if (!block)
return -EINVAL;
/* /*
* If the request is an ERP request there is nothing to requeue. * If the request is an ERP request there is nothing to requeue.
* This will be done with the remaining original request. * This will be done with the remaining original request.
*/ */
if (cqr->refers) if (cqr->refers)
return 0; return;
spin_lock_irq(&cqr->dq->lock); spin_lock_irq(&cqr->dq->lock);
req = (struct request *) cqr->callback_data; req = (struct request *) cqr->callback_data;
blk_mq_requeue_request(req, true); blk_mq_requeue_request(req, true);
spin_unlock_irq(&cqr->dq->lock); spin_unlock_irq(&cqr->dq->lock);
return 0; return;
} }
/* static int _dasd_requests_to_flushqueue(struct dasd_block *block,
* Go through all request on the dasd_block request queue, cancel them struct list_head *flush_queue)
* on the respective dasd_device, and return them to the generic
* block layer.
*/
static int dasd_flush_block_queue(struct dasd_block *block)
{ {
struct dasd_ccw_req *cqr, *n; struct dasd_ccw_req *cqr, *n;
int rc, i;
struct list_head flush_queue;
unsigned long flags; unsigned long flags;
int rc, i;
INIT_LIST_HEAD(&flush_queue); spin_lock_irqsave(&block->queue_lock, flags);
spin_lock_bh(&block->queue_lock);
rc = 0; rc = 0;
restart: restart:
list_for_each_entry_safe(cqr, n, &block->ccw_queue, blocklist) { list_for_each_entry_safe(cqr, n, &block->ccw_queue, blocklist) {
...@@ -2992,13 +2983,32 @@ static int dasd_flush_block_queue(struct dasd_block *block) ...@@ -2992,13 +2983,32 @@ static int dasd_flush_block_queue(struct dasd_block *block)
* is returned from the dasd_device layer. * is returned from the dasd_device layer.
*/ */
cqr->callback = _dasd_wake_block_flush_cb; cqr->callback = _dasd_wake_block_flush_cb;
for (i = 0; cqr != NULL; cqr = cqr->refers, i++) for (i = 0; cqr; cqr = cqr->refers, i++)
list_move_tail(&cqr->blocklist, &flush_queue); list_move_tail(&cqr->blocklist, flush_queue);
if (i > 1) if (i > 1)
/* moved more than one request - need to restart */ /* moved more than one request - need to restart */
goto restart; goto restart;
} }
spin_unlock_bh(&block->queue_lock); spin_unlock_irqrestore(&block->queue_lock, flags);
return rc;
}
/*
* Go through all request on the dasd_block request queue, cancel them
* on the respective dasd_device, and return them to the generic
* block layer.
*/
static int dasd_flush_block_queue(struct dasd_block *block)
{
struct dasd_ccw_req *cqr, *n;
struct list_head flush_queue;
unsigned long flags;
int rc;
INIT_LIST_HEAD(&flush_queue);
rc = _dasd_requests_to_flushqueue(block, &flush_queue);
/* Now call the callback function of flushed requests */ /* Now call the callback function of flushed requests */
restart_cb: restart_cb:
list_for_each_entry_safe(cqr, n, &flush_queue, blocklist) { list_for_each_entry_safe(cqr, n, &flush_queue, blocklist) {
...@@ -3881,75 +3891,36 @@ EXPORT_SYMBOL_GPL(dasd_generic_space_avail); ...@@ -3881,75 +3891,36 @@ EXPORT_SYMBOL_GPL(dasd_generic_space_avail);
*/ */
int dasd_generic_requeue_all_requests(struct dasd_device *device) int dasd_generic_requeue_all_requests(struct dasd_device *device)
{ {
struct dasd_block *block = device->block;
struct list_head requeue_queue; struct list_head requeue_queue;
struct dasd_ccw_req *cqr, *n; struct dasd_ccw_req *cqr, *n;
struct dasd_ccw_req *refers;
int rc; int rc;
INIT_LIST_HEAD(&requeue_queue); if (!block)
spin_lock_irq(get_ccwdev_lock(device->cdev)); return 0;
rc = 0;
list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) {
/* Check status and move request to flush_queue */
if (cqr->status == DASD_CQR_IN_IO) {
rc = device->discipline->term_IO(cqr);
if (rc) {
/* unable to terminate requeust */
dev_err(&device->cdev->dev,
"Unable to terminate request %p "
"on suspend\n", cqr);
spin_unlock_irq(get_ccwdev_lock(device->cdev));
dasd_put_device(device);
return rc;
}
}
list_move_tail(&cqr->devlist, &requeue_queue);
}
spin_unlock_irq(get_ccwdev_lock(device->cdev));
list_for_each_entry_safe(cqr, n, &requeue_queue, devlist) {
wait_event(dasd_flush_wq,
(cqr->status != DASD_CQR_CLEAR_PENDING));
/* INIT_LIST_HEAD(&requeue_queue);
* requeue requests to blocklayer will only work rc = _dasd_requests_to_flushqueue(block, &requeue_queue);
* for block device requests
*/
if (_dasd_requeue_request(cqr))
continue;
/* remove requests from device and block queue */ /* Now call the callback function of flushed requests */
list_del_init(&cqr->devlist); restart_cb:
while (cqr->refers != NULL) { list_for_each_entry_safe(cqr, n, &requeue_queue, blocklist) {
refers = cqr->refers; wait_event(dasd_flush_wq, (cqr->status < DASD_CQR_QUEUED));
/* remove the request from the block queue */ /* Process finished ERP request. */
list_del(&cqr->blocklist); if (cqr->refers) {
/* free the finished erp request */ spin_lock_bh(&block->queue_lock);
dasd_free_erp_request(cqr, cqr->memdev); __dasd_process_erp(block->base, cqr);
cqr = refers; spin_unlock_bh(&block->queue_lock);
/* restart list_for_xx loop since dasd_process_erp
* might remove multiple elements
*/
goto restart_cb;
} }
_dasd_requeue_request(cqr);
/*
* _dasd_requeue_request already checked for a valid
* blockdevice, no need to check again
* all erp requests (cqr->refers) have a cqr->block
* pointer copy from the original cqr
*/
list_del_init(&cqr->blocklist); list_del_init(&cqr->blocklist);
cqr->block->base->discipline->free_cp( cqr->block->base->discipline->free_cp(
cqr, (struct request *) cqr->callback_data); cqr, (struct request *) cqr->callback_data);
} }
/*
* if requests remain then they are internal request
* and go back to the device queue
*/
if (!list_empty(&requeue_queue)) {
/* move freeze_queue to start of the ccw_queue */
spin_lock_irq(get_ccwdev_lock(device->cdev));
list_splice_tail(&requeue_queue, &device->ccw_queue);
spin_unlock_irq(get_ccwdev_lock(device->cdev));
}
dasd_schedule_device_bh(device); dasd_schedule_device_bh(device);
return rc; return rc;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment