Commit b64845a7 authored by Sreekanth Reddy's avatar Sreekanth Reddy Committed by Martin K. Petersen

scsi: mpi3mr: Detect async reset that occurred in firmware

Detect asynchronous reset that occurred in the firmware by polling for
reset history bit of IOC status register is set and if that bit is set,
then the driver waits for the controller to become ready and then
re-initializes the controller.

Also reduce the time driver is waiting for the controller to acknowledge
the reset action after issuing a specific reset action to the
controller. The wait time is reduced from 510 seconds to 30 seconds. If the
controller didn't acknowledge a specific reset action within the time
interval then the driver marks the controller as unrecoverable instead of
retrying two more times prior to giving up.

Link: https://lore.kernel.org/r/20211220141159.16117-17-sreekanth.reddy@broadcom.comSigned-off-by: default avatarSreekanth Reddy <sreekanth.reddy@broadcom.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent c0b00a93
......@@ -110,6 +110,7 @@ extern int prot_mask;
#define MPI3MR_TSUPDATE_INTERVAL 900
#define MPI3MR_DEFAULT_SHUTDOWN_TIME 120
#define MPI3MR_RAID_ERRREC_RESET_TIMEOUT 180
#define MPI3MR_RESET_ACK_TIMEOUT 30
#define MPI3MR_WATCHDOG_INTERVAL 1000 /* in milli seconds */
......@@ -210,7 +211,8 @@ enum mpi3mr_reset_reason {
MPI3MR_RESET_FROM_GETPKGVER_TIMEOUT = 21,
MPI3MR_RESET_FROM_PELABORT_TIMEOUT = 22,
MPI3MR_RESET_FROM_SYSFS = 23,
MPI3MR_RESET_FROM_SYSFS_TIMEOUT = 24
MPI3MR_RESET_FROM_SYSFS_TIMEOUT = 24,
MPI3MR_RESET_FROM_FIRMWARE = 27,
};
/**
......@@ -678,9 +680,9 @@ struct scmd_priv {
* @removepend_bitmap: Remove pending bitmap
* @delayed_rmhs_list: Delayed device removal list
* @ts_update_counter: Timestamp update counter
* @fault_dbg: Fault debug flag
* @reset_in_progress: Reset in progress flag
* @unrecoverable: Controller unrecoverable flag
* @prev_reset_result: Result of previous reset
* @reset_mutex: Controller reset mutex
* @reset_waitq: Controller reset wait queue
* @diagsave_timeout: Diagnostic information save timeout
......@@ -804,9 +806,9 @@ struct mpi3mr_ioc {
struct list_head delayed_rmhs_list;
u32 ts_update_counter;
u8 fault_dbg;
u8 reset_in_progress;
u8 unrecoverable;
int prev_reset_result;
struct mutex reset_mutex;
wait_queue_head_t reset_waitq;
......@@ -891,8 +893,6 @@ void mpi3mr_stop_watchdog(struct mpi3mr_ioc *mrioc);
int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc,
u32 reset_reason, u8 snapdump);
int mpi3mr_diagfault_reset_handler(struct mpi3mr_ioc *mrioc,
u32 reset_reason);
void mpi3mr_ioc_disable_intr(struct mpi3mr_ioc *mrioc);
void mpi3mr_ioc_enable_intr(struct mpi3mr_ioc *mrioc);
......@@ -907,5 +907,7 @@ void mpi3mr_invalidate_devhandles(struct mpi3mr_ioc *mrioc);
void mpi3mr_rfresh_tgtdevs(struct mpi3mr_ioc *mrioc);
void mpi3mr_flush_delayed_rmhs_list(struct mpi3mr_ioc *mrioc);
void mpi3mr_check_rh_fault_ioc(struct mpi3mr_ioc *mrioc, u32 reason_code);
void mpi3mr_print_fault_info(struct mpi3mr_ioc *mrioc);
void mpi3mr_check_rh_fault_ioc(struct mpi3mr_ioc *mrioc, u32 reason_code);
#endif /*MPI3MR_H_INCLUDED*/
This diff is collapsed.
......@@ -3073,32 +3073,42 @@ static int mpi3mr_scan_finished(struct Scsi_Host *shost,
{
struct mpi3mr_ioc *mrioc = shost_priv(shost);
u32 pe_timeout = MPI3MR_PORTENABLE_TIMEOUT;
u32 ioc_status = readl(&mrioc->sysif_regs->ioc_status);
if (time >= (pe_timeout * HZ)) {
if ((ioc_status & MPI3_SYSIF_IOC_STATUS_RESET_HISTORY) ||
(ioc_status & MPI3_SYSIF_IOC_STATUS_FAULT)) {
ioc_err(mrioc, "port enable failed due to fault or reset\n");
mpi3mr_print_fault_info(mrioc);
mrioc->scan_failed = MPI3_IOCSTATUS_INTERNAL_ERROR;
mrioc->scan_started = 0;
mrioc->init_cmds.is_waiting = 0;
mrioc->init_cmds.callback = NULL;
mrioc->init_cmds.state = MPI3MR_CMD_NOTUSED;
ioc_err(mrioc, "%s :port enable request timed out\n", __func__);
mrioc->is_driver_loading = 0;
mpi3mr_soft_reset_handler(mrioc,
MPI3MR_RESET_FROM_PE_TIMEOUT, 1);
}
if (mrioc->scan_failed) {
ioc_err(mrioc,
"%s :port enable failed with (ioc_status=0x%08x)\n",
__func__, mrioc->scan_failed);
mrioc->is_driver_loading = 0;
mrioc->stop_drv_processing = 1;
return 1;
if (time >= (pe_timeout * HZ)) {
ioc_err(mrioc, "port enable failed due to time out\n");
mpi3mr_check_rh_fault_ioc(mrioc,
MPI3MR_RESET_FROM_PE_TIMEOUT);
mrioc->scan_failed = MPI3_IOCSTATUS_INTERNAL_ERROR;
mrioc->scan_started = 0;
mrioc->init_cmds.is_waiting = 0;
mrioc->init_cmds.callback = NULL;
mrioc->init_cmds.state = MPI3MR_CMD_NOTUSED;
}
if (mrioc->scan_started)
return 0;
ioc_info(mrioc, "%s :port enable: SUCCESS\n", __func__);
if (mrioc->scan_failed) {
ioc_err(mrioc,
"port enable failed with status=0x%04x\n",
mrioc->scan_failed);
} else
ioc_info(mrioc, "port enable is successfully completed\n");
mpi3mr_start_watchdog(mrioc);
mrioc->is_driver_loading = 0;
return 1;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment