Commit 318aaf34 authored by Jason Yan's avatar Jason Yan Committed by Martin K. Petersen

scsi: libsas: defer ata device eh commands to libata

When ata device doing EH, some commands still attached with tasks are
not passed to libata when abort failed or recover failed, so libata did
not handle these commands. After these commands done, sas task is freed,
but ata qc is not freed. This will cause ata qc leak and trigger a
warning like below:

WARNING: CPU: 0 PID: 28512 at drivers/ata/libata-eh.c:4037
ata_eh_finish+0xb4/0xcc
CPU: 0 PID: 28512 Comm: kworker/u32:2 Tainted: G     W  OE 4.14.0#1
......
Call trace:
[<ffff0000088b7bd0>] ata_eh_finish+0xb4/0xcc
[<ffff0000088b8420>] ata_do_eh+0xc4/0xd8
[<ffff0000088b8478>] ata_std_error_handler+0x44/0x8c
[<ffff0000088b8068>] ata_scsi_port_error_handler+0x480/0x694
[<ffff000008875fc4>] async_sas_ata_eh+0x4c/0x80
[<ffff0000080f6be8>] async_run_entry_fn+0x4c/0x170
[<ffff0000080ebd70>] process_one_work+0x144/0x390
[<ffff0000080ec100>] worker_thread+0x144/0x418
[<ffff0000080f2c98>] kthread+0x10c/0x138
[<ffff0000080855dc>] ret_from_fork+0x10/0x18

If ata qc leaked too many, ata tag allocation will fail and io blocked
for ever.

As suggested by Dan Williams, defer ata device commands to libata and
merge sas_eh_finish_cmd() with sas_eh_defer_cmd(). libata will handle
ata qcs correctly after this.
Signed-off-by: default avatarJason Yan <yanaijie@huawei.com>
CC: Xiaofei Tan <tanxiaofei@huawei.com>
CC: John Garry <john.garry@huawei.com>
CC: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: default avatarDan Williams <dan.j.williams@intel.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 20bd1d02
...@@ -223,6 +223,7 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) ...@@ -223,6 +223,7 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
{ {
struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host); struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host);
struct domain_device *dev = cmd_to_domain_dev(cmd);
struct sas_task *task = TO_SAS_TASK(cmd); struct sas_task *task = TO_SAS_TASK(cmd);
/* At this point, we only get called following an actual abort /* At this point, we only get called following an actual abort
...@@ -231,6 +232,14 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) ...@@ -231,6 +232,14 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
*/ */
sas_end_task(cmd, task); sas_end_task(cmd, task);
if (dev_is_sata(dev)) {
/* defer commands to libata so that libata EH can
* handle ata qcs correctly
*/
list_move_tail(&cmd->eh_entry, &sas_ha->eh_ata_q);
return;
}
/* now finish the command and move it on to the error /* now finish the command and move it on to the error
* handler done list, this also takes it off the * handler done list, this also takes it off the
* error handler pending list. * error handler pending list.
...@@ -238,22 +247,6 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) ...@@ -238,22 +247,6 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
scsi_eh_finish_cmd(cmd, &sas_ha->eh_done_q); scsi_eh_finish_cmd(cmd, &sas_ha->eh_done_q);
} }
static void sas_eh_defer_cmd(struct scsi_cmnd *cmd)
{
struct domain_device *dev = cmd_to_domain_dev(cmd);
struct sas_ha_struct *ha = dev->port->ha;
struct sas_task *task = TO_SAS_TASK(cmd);
if (!dev_is_sata(dev)) {
sas_eh_finish_cmd(cmd);
return;
}
/* report the timeout to libata */
sas_end_task(cmd, task);
list_move_tail(&cmd->eh_entry, &ha->eh_ata_q);
}
static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd *my_cmd) static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd *my_cmd)
{ {
struct scsi_cmnd *cmd, *n; struct scsi_cmnd *cmd, *n;
...@@ -261,7 +254,7 @@ static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd ...@@ -261,7 +254,7 @@ static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd
list_for_each_entry_safe(cmd, n, error_q, eh_entry) { list_for_each_entry_safe(cmd, n, error_q, eh_entry) {
if (cmd->device->sdev_target == my_cmd->device->sdev_target && if (cmd->device->sdev_target == my_cmd->device->sdev_target &&
cmd->device->lun == my_cmd->device->lun) cmd->device->lun == my_cmd->device->lun)
sas_eh_defer_cmd(cmd); sas_eh_finish_cmd(cmd);
} }
} }
...@@ -618,12 +611,12 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * ...@@ -618,12 +611,12 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
case TASK_IS_DONE: case TASK_IS_DONE:
SAS_DPRINTK("%s: task 0x%p is done\n", __func__, SAS_DPRINTK("%s: task 0x%p is done\n", __func__,
task); task);
sas_eh_defer_cmd(cmd); sas_eh_finish_cmd(cmd);
continue; continue;
case TASK_IS_ABORTED: case TASK_IS_ABORTED:
SAS_DPRINTK("%s: task 0x%p is aborted\n", SAS_DPRINTK("%s: task 0x%p is aborted\n",
__func__, task); __func__, task);
sas_eh_defer_cmd(cmd); sas_eh_finish_cmd(cmd);
continue; continue;
case TASK_IS_AT_LU: case TASK_IS_AT_LU:
SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task); SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task);
...@@ -634,7 +627,7 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * ...@@ -634,7 +627,7 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
"recovered\n", "recovered\n",
SAS_ADDR(task->dev), SAS_ADDR(task->dev),
cmd->device->lun); cmd->device->lun);
sas_eh_defer_cmd(cmd); sas_eh_finish_cmd(cmd);
sas_scsi_clear_queue_lu(work_q, cmd); sas_scsi_clear_queue_lu(work_q, cmd);
goto Again; goto Again;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment