scsi: lpfc: Fix MRQ > 1 context list handling

Various oops including cpu LOCKUPs were seen. For asynchronously received ius where the driver must assign exchange resources, the resources were on a single get (free) list and put list (finished, waiting to be put on get list). As all cpus are sharing the lists, an interrupt for a receive frame may have to wait for all the other cpus to place their done work onto the put list before it can acquire the lock to pull from the list. Fix by breaking the resource lists into per-cpu lists or at least more than 1 list with cpu's sharing the lists). A cpu would allocate from the free list for its own cpu, and put its done work on the its own put list - avoiding the contention. As cpu load may vary, when empty, a cpu may grab from another cpu, thereby changing resource distribution. But searching for a resource only occurs on 1 or a few cpus until a single resource can be allocated. if the condition reoccurs, it starts looking at a different cpu. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

scsi: lpfc: Fix MRQ > 1 context list handling
Various oops including cpu LOCKUPs were seen. For asynchronously received ius where the driver must assign exchange resources, the resources were on a single get (free) list and put list (finished, waiting to be put on get list). As all cpus are sharing the lists, an interrupt for a receive frame may have to wait for all the other cpus to place their done work onto the put list before it can acquire the lock to pull from the list. Fix by breaking the resource lists into per-cpu lists or at least more than 1 list with cpu's sharing the lists). A cpu would allocate from the free list for its own cpu, and put its done work on the its own put list - avoiding the contention. As cpu load may vary, when empty, a cpu may grab from another cpu, thereby changing resource distribution. But searching for a resource only occurs on 1 or a few cpus until a single resource can be allocated. if the condition reoccurs, it starts looking at a different cpu. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
66d7ce93 · Dick Kennedy · Martin K. Petersen · e3e2863d · 66d7ce93 · 66d7ce93
Commit 66d7ce93 authored 7 years ago by Dick Kennedy Committed by Martin K. Petersen 7 years ago
8 changed files
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -245,13 +245,10 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 				atomic_read(&tgtp->xmt_abort_rsp),
 				atomic_read(&tgtp->xmt_abort_rsp_error));

-		spin_lock(&phba->sli4_hba.nvmet_ctx_get_lock);
-		spin_lock(&phba->sli4_hba.nvmet_ctx_put_lock);
-		tot = phba->sli4_hba.nvmet_xri_cnt -
-			(phba->sli4_hba.nvmet_ctx_get_cnt +
-			phba->sli4_hba.nvmet_ctx_put_cnt);
-		spin_unlock(&phba->sli4_hba.nvmet_ctx_put_lock);
-		spin_unlock(&phba->sli4_hba.nvmet_ctx_get_lock);
+		/* Calculate outstanding IOs */
+		tot = atomic_read(&tgtp->rcv_fcp_cmd_drop);
+		tot += atomic_read(&tgtp->xmt_fcp_release);
+		tot = atomic_read(&tgtp->rcv_fcp_cmd_in) - tot;

 		len += snprintf(buf + len, PAGE_SIZE - len,
 				"IO_CTX: %08x  WAIT: cur %08x tot %08x\n"

--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -556,9 +556,8 @@ int lpfc_nvmet_update_targetport(struct lpfc_hba *phba);
 void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba);
 void lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba,
 			struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocb);
-void lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba,
-			struct lpfc_sli_ring *pring,
-			struct rqb_dmabuf *nvmebuf, uint64_t isr_ts);
+void lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, uint32_t idx,
+				struct rqb_dmabuf *nvmebuf, uint64_t isr_ts);
 void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
 void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
 				struct lpfc_iocbq *cmdiocb,

--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -848,13 +848,10 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 			spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
 		}

-		spin_lock(&phba->sli4_hba.nvmet_ctx_get_lock);
-		spin_lock(&phba->sli4_hba.nvmet_ctx_put_lock);
-		tot = phba->sli4_hba.nvmet_xri_cnt -
-			(phba->sli4_hba.nvmet_ctx_get_cnt +
-			phba->sli4_hba.nvmet_ctx_put_cnt);
-		spin_unlock(&phba->sli4_hba.nvmet_ctx_put_lock);
-		spin_unlock(&phba->sli4_hba.nvmet_ctx_get_lock);
+		/* Calculate outstanding IOs */
+		tot = atomic_read(&tgtp->rcv_fcp_cmd_drop);
+		tot += atomic_read(&tgtp->xmt_fcp_release);
+		tot = atomic_read(&tgtp->rcv_fcp_cmd_in) - tot;

 		len += snprintf(buf + len, size - len,
 				"IO_CTX: %08x  WAIT: cur %08x tot %08x\n"

--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1253,6 +1253,7 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
 	unsigned long time_elapsed;
 	uint32_t tick_cqe, max_cqe, val;
 	uint64_t tot, data1, data2, data3;
+	struct lpfc_nvmet_tgtport *tgtp;
 	struct lpfc_register reg_data;
 	void __iomem *eqdreg = phba->sli4_hba.u.if_type2.EQDregaddr;

@@ -1281,13 +1282,11 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
 		/* Check outstanding IO count */
 		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
 			if (phba->nvmet_support) {
-				spin_lock(&phba->sli4_hba.nvmet_ctx_get_lock);
-				spin_lock(&phba->sli4_hba.nvmet_ctx_put_lock);
-				tot = phba->sli4_hba.nvmet_xri_cnt -
-					(phba->sli4_hba.nvmet_ctx_get_cnt +
-					phba->sli4_hba.nvmet_ctx_put_cnt);
-				spin_unlock(&phba->sli4_hba.nvmet_ctx_put_lock);
-				spin_unlock(&phba->sli4_hba.nvmet_ctx_get_lock);
+				tgtp = phba->targetport->private;
+				/* Calculate outstanding IOs */
+				tot = atomic_read(&tgtp->rcv_fcp_cmd_drop);
+				tot += atomic_read(&tgtp->xmt_fcp_release);
+				tot = atomic_read(&tgtp->rcv_fcp_cmd_in) - tot;
 			} else {
 				tot = atomic_read(&phba->fc4NvmeIoCmpls);
 				data1 = atomic_read(
@@ -5937,8 +5936,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
-		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_ctx_get_list);
-		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_ctx_put_list);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_io_wait_list);

 		/* Fast-path XRI aborted CQ Event work queue list */
@@ -5947,8 +5944,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)

 	/* This abort list used by worker thread */
 	spin_lock_init(&phba->sli4_hba.sgl_list_lock);
-	spin_lock_init(&phba->sli4_hba.nvmet_ctx_get_lock);
-	spin_lock_init(&phba->sli4_hba.nvmet_ctx_put_lock);
 	spin_lock_init(&phba->sli4_hba.nvmet_io_wait_lock);

 	/*

--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -73,6 +73,19 @@ struct lpfc_nvmet_tgtport {
 	atomic_t xmt_abort_rsp_error;
 };

+struct lpfc_nvmet_ctx_info {
+	struct list_head nvmet_ctx_list;
+	spinlock_t	nvmet_ctx_list_lock; /* lock per CPU */
+	struct lpfc_nvmet_ctx_info *nvmet_ctx_next_cpu;
+	struct lpfc_nvmet_ctx_info *nvmet_ctx_start_cpu;
+	uint16_t	nvmet_ctx_list_cnt;
+	char pad[16];  /* pad to a cache-line */
+};
+
+/* This retrieves the context info associated with the specified cpu / mrq */
+#define lpfc_get_ctx_list(phba, cpu, mrq)  \
+	(phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq))
+
 struct lpfc_nvmet_rcv_ctx {
 	union {
 		struct nvmefc_tgt_ls_req ls_req;
@@ -91,6 +104,7 @@ struct lpfc_nvmet_rcv_ctx {
 	uint16_t size;
 	uint16_t entry_cnt;
 	uint16_t cpu;
+	uint16_t idx;
 	uint16_t state;
 	/* States */
 #define LPFC_NVMET_STE_LS_RCV		1

--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -13291,7 +13291,7 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 		if (fc_hdr->fh_type == FC_TYPE_FCP) {
 			dma_buf->bytes_recv = bf_get(lpfc_rcqe_length,  rcqe);
 			lpfc_nvmet_unsol_fcp_event(
-				phba, phba->sli4_hba.els_wq->pring, dma_buf,
+				phba, idx, dma_buf,
 				cq->assoc_qp->isr_timestamp);
 			return false;
 		}

--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -622,8 +622,6 @@ struct lpfc_sli4_hba {
 	uint16_t scsi_xri_start;
 	uint16_t els_xri_cnt;
 	uint16_t nvmet_xri_cnt;
-	uint16_t nvmet_ctx_get_cnt;
-	uint16_t nvmet_ctx_put_cnt;
 	uint16_t nvmet_io_wait_cnt;
 	uint16_t nvmet_io_wait_total;
 	struct list_head lpfc_els_sgl_list;
@@ -632,9 +630,8 @@ struct lpfc_sli4_hba {
 	struct list_head lpfc_abts_nvmet_ctx_list;
 	struct list_head lpfc_abts_scsi_buf_list;
 	struct list_head lpfc_abts_nvme_buf_list;
-	struct list_head lpfc_nvmet_ctx_get_list;
-	struct list_head lpfc_nvmet_ctx_put_list;
 	struct list_head lpfc_nvmet_io_wait_list;
+	struct lpfc_nvmet_ctx_info *nvmet_ctx_info;
 	struct lpfc_sglq **lpfc_sglq_active_list;
 	struct list_head lpfc_rpi_hdr_list;
 	unsigned long *rpi_bmask;
@@ -665,8 +662,6 @@ struct lpfc_sli4_hba {
 	spinlock_t abts_nvme_buf_list_lock; /* list of aborted SCSI IOs */
 	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
 	spinlock_t sgl_list_lock; /* list of aborted els IOs */
-	spinlock_t nvmet_ctx_get_lock; /* list of avail XRI contexts */
-	spinlock_t nvmet_ctx_put_lock; /* list of avail XRI contexts */
 	spinlock_t nvmet_io_wait_lock; /* IOs waiting for ctx resources */
 	uint32_t physical_port;