Commit 840eda96 authored by James Smart's avatar James Smart Committed by Martin K. Petersen

scsi: lpfc: Fix erroneous cpu limit of 128 on I/O statistics

The cpu io statistics were capped by a hard define limit of 128. This
effectively was a max number of CPUs, not an actual CPU count, nor actual
CPU numbers which can be even larger than both of those values. This made
stats off/misleading and on large CPU count systems, wrong.

Fix the stats so that all CPUs can have a stats struct.  Fix the looping
such that it loops by hdwq, finds CPUs that used the hdwq, and sum the
stats, then display.

Link: https://lore.kernel.org/r/20200322181304.37655-9-jsmart2021@gmail.comSigned-off-by: default avatarJames Smart <jsmart2021@gmail.com>
Signed-off-by: default avatarDick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 807e7353
......@@ -481,7 +481,7 @@ struct lpfc_vport {
struct dentry *debug_nvmestat;
struct dentry *debug_scsistat;
struct dentry *debug_nvmektime;
struct dentry *debug_cpucheck;
struct dentry *debug_hdwqstat;
struct dentry *vport_debugfs_root;
struct lpfc_debugfs_trc *disc_trc;
atomic_t disc_trc_cnt;
......@@ -1175,12 +1175,11 @@ struct lpfc_hba {
uint16_t sfp_warning;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
uint16_t cpucheck_on;
uint16_t hdwqstat_on;
#define LPFC_CHECK_OFF 0
#define LPFC_CHECK_NVME_IO 1
#define LPFC_CHECK_NVMET_RCV 2
#define LPFC_CHECK_NVMET_IO 4
#define LPFC_CHECK_SCSI_IO 8
#define LPFC_CHECK_NVMET_IO 2
#define LPFC_CHECK_SCSI_IO 4
uint16_t ktime_on;
uint64_t ktime_data_samples;
uint64_t ktime_status_samples;
......
This diff is collapsed.
......@@ -47,7 +47,6 @@
/* nvmestat output buffer size */
#define LPFC_NVMESTAT_SIZE 8192
#define LPFC_NVMEKTIME_SIZE 8192
#define LPFC_CPUCHECK_SIZE 8192
#define LPFC_NVMEIO_TRC_SIZE 8192
/* scsistat output buffer size */
......
......@@ -6951,6 +6951,17 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
rc = -ENOMEM;
goto out_free_hba_cpu_map;
}
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
phba->sli4_hba.c_stat = alloc_percpu(struct lpfc_hdwq_stat);
if (!phba->sli4_hba.c_stat) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"3332 Failed allocating per cpu hdwq stats\n");
rc = -ENOMEM;
goto out_free_hba_eq_info;
}
#endif
/*
* Enable sr-iov virtual functions if supported and configured
* through the module parameter.
......@@ -6970,6 +6981,10 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
return 0;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
out_free_hba_eq_info:
free_percpu(phba->sli4_hba.eq_info);
#endif
out_free_hba_cpu_map:
kfree(phba->sli4_hba.cpu_map);
out_free_hba_eq_hdl:
......@@ -7008,6 +7023,9 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba)
struct lpfc_fcf_conn_entry *conn_entry, *next_conn_entry;
free_percpu(phba->sli4_hba.eq_info);
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
free_percpu(phba->sli4_hba.c_stat);
#endif
/* Free memory allocated for msi-x interrupt vector to CPU mapping */
kfree(phba->sli4_hba.cpu_map);
......@@ -10848,6 +10866,9 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
#ifdef CONFIG_X86
struct cpuinfo_x86 *cpuinfo;
#endif
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
struct lpfc_hdwq_stat *c_stat;
#endif
max_phys_id = 0;
min_phys_id = LPFC_VECTOR_MAP_EMPTY;
......@@ -11099,10 +11120,17 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
idx = 0;
for_each_possible_cpu(cpu) {
cpup = &phba->sli4_hba.cpu_map[cpu];
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
c_stat = per_cpu_ptr(phba->sli4_hba.c_stat, cpu);
c_stat->hdwq_no = cpup->hdwq;
#endif
if (cpup->hdwq != LPFC_VECTOR_MAP_EMPTY)
continue;
cpup->hdwq = idx++ % phba->cfg_hdw_queue;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
c_stat->hdwq_no = cpup->hdwq;
#endif
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"3340 Set Affinity: not present "
"CPU %d hdwq %d\n",
......
......@@ -1012,6 +1012,9 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
uint32_t code, status, idx;
uint16_t cid, sqhd, data;
uint32_t *ptr;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
int cpu;
#endif
/* Sanity check on return of outstanding command */
if (!lpfc_ncmd) {
......@@ -1184,19 +1187,15 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
phba->ktime_last_cmd = lpfc_ncmd->ts_data_nvme;
lpfc_nvme_ktime(phba, lpfc_ncmd);
}
if (unlikely(phba->cpucheck_on & LPFC_CHECK_NVME_IO)) {
uint32_t cpu;
idx = lpfc_ncmd->cur_iocbq.hba_wqidx;
if (unlikely(phba->hdwqstat_on & LPFC_CHECK_NVME_IO)) {
cpu = raw_smp_processor_id();
if (cpu < LPFC_CHECK_CPU_CNT) {
if (lpfc_ncmd->cpu != cpu)
lpfc_printf_vlog(vport,
KERN_INFO, LOG_NVME_IOERR,
"6701 CPU Check cmpl: "
"cpu %d expect %d\n",
cpu, lpfc_ncmd->cpu);
phba->sli4_hba.hdwq[idx].cpucheck_cmpl_io[cpu]++;
}
this_cpu_inc(phba->sli4_hba.c_stat->cmpl_io);
if (lpfc_ncmd->cpu != cpu)
lpfc_printf_vlog(vport,
KERN_INFO, LOG_NVME_IOERR,
"6701 CPU Check cmpl: "
"cpu %d expect %d\n",
cpu, lpfc_ncmd->cpu);
}
#endif
......@@ -1745,19 +1744,17 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
if (lpfc_ncmd->ts_cmd_start)
lpfc_ncmd->ts_cmd_wqput = ktime_get_ns();
if (phba->cpucheck_on & LPFC_CHECK_NVME_IO) {
if (phba->hdwqstat_on & LPFC_CHECK_NVME_IO) {
cpu = raw_smp_processor_id();
if (cpu < LPFC_CHECK_CPU_CNT) {
lpfc_ncmd->cpu = cpu;
if (idx != cpu)
lpfc_printf_vlog(vport,
KERN_INFO, LOG_NVME_IOERR,
"6702 CPU Check cmd: "
"cpu %d wq %d\n",
lpfc_ncmd->cpu,
lpfc_queue_info->index);
phba->sli4_hba.hdwq[idx].cpucheck_xmt_io[cpu]++;
}
this_cpu_inc(phba->sli4_hba.c_stat->xmt_io);
lpfc_ncmd->cpu = cpu;
if (idx != cpu)
lpfc_printf_vlog(vport,
KERN_INFO, LOG_NVME_IOERR,
"6702 CPU Check cmd: "
"cpu %d wq %d\n",
lpfc_ncmd->cpu,
lpfc_queue_info->index);
}
#endif
return 0;
......
......@@ -707,7 +707,7 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
struct lpfc_nvmet_rcv_ctx *ctxp;
uint32_t status, result, op, start_clean, logerr;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
uint32_t id;
int id;
#endif
ctxp = cmdwqe->context2;
......@@ -814,16 +814,14 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
rsp->done(rsp);
}
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) {
if (phba->hdwqstat_on & LPFC_CHECK_NVMET_IO) {
id = raw_smp_processor_id();
if (id < LPFC_CHECK_CPU_CNT) {
if (ctxp->cpu != id)
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
"6704 CPU Check cmdcmpl: "
"cpu %d expect %d\n",
id, ctxp->cpu);
phba->sli4_hba.hdwq[rsp->hwqid].cpucheck_cmpl_io[id]++;
}
this_cpu_inc(phba->sli4_hba.c_stat->cmpl_io);
if (ctxp->cpu != id)
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
"6704 CPU Check cmdcmpl: "
"cpu %d expect %d\n",
id, ctxp->cpu);
}
#endif
}
......@@ -931,6 +929,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
struct lpfc_sli_ring *pring;
unsigned long iflags;
int rc;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
int id;
#endif
if (phba->pport->load_flag & FC_UNLOADING) {
rc = -ENODEV;
......@@ -954,16 +955,14 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
if (!ctxp->hdwq)
ctxp->hdwq = &phba->sli4_hba.hdwq[rsp->hwqid];
if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) {
int id = raw_smp_processor_id();
if (id < LPFC_CHECK_CPU_CNT) {
if (rsp->hwqid != id)
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
"6705 CPU Check OP: "
"cpu %d expect %d\n",
id, rsp->hwqid);
phba->sli4_hba.hdwq[rsp->hwqid].cpucheck_xmt_io[id]++;
}
if (phba->hdwqstat_on & LPFC_CHECK_NVMET_IO) {
id = raw_smp_processor_id();
this_cpu_inc(phba->sli4_hba.c_stat->xmt_io);
if (rsp->hwqid != id)
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
"6705 CPU Check OP: "
"cpu %d expect %d\n",
id, rsp->hwqid);
ctxp->cpu = id; /* Setup cpu for cmpl check */
}
#endif
......@@ -2270,15 +2269,13 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
size = nvmebuf->bytes_recv;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) {
if (current_cpu < LPFC_CHECK_CPU_CNT) {
if (idx != current_cpu)
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
"6703 CPU Check rcv: "
"cpu %d expect %d\n",
current_cpu, idx);
phba->sli4_hba.hdwq[idx].cpucheck_rcv_io[current_cpu]++;
}
if (phba->hdwqstat_on & LPFC_CHECK_NVMET_IO) {
this_cpu_inc(phba->sli4_hba.c_stat->rcv_io);
if (idx != current_cpu)
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
"6703 CPU Check rcv: "
"cpu %d expect %d\n",
current_cpu, idx);
}
#endif
......
......@@ -3805,9 +3805,6 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
struct Scsi_Host *shost;
int idx;
uint32_t logit = LOG_FCP;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
int cpu;
#endif
/* Guard against abort handler being called at same time */
spin_lock(&lpfc_cmd->buf_lock);
......@@ -3826,11 +3823,8 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
phba->sli4_hba.hdwq[idx].scsi_cstat.io_cmpls++;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
if (unlikely(phba->cpucheck_on & LPFC_CHECK_SCSI_IO)) {
cpu = raw_smp_processor_id();
if (cpu < LPFC_CHECK_CPU_CNT && phba->sli4_hba.hdwq)
phba->sli4_hba.hdwq[idx].cpucheck_cmpl_io[cpu]++;
}
if (unlikely(phba->hdwqstat_on & LPFC_CHECK_SCSI_IO))
this_cpu_inc(phba->sli4_hba.c_stat->cmpl_io);
#endif
shost = cmd->device->host;
......@@ -4503,9 +4497,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
struct lpfc_io_buf *lpfc_cmd;
struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
int err, idx;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
int cpu;
#endif
rdata = lpfc_rport_data_from_scsi_device(cmnd->device);
......@@ -4626,14 +4617,8 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp);
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
if (unlikely(phba->cpucheck_on & LPFC_CHECK_SCSI_IO)) {
cpu = raw_smp_processor_id();
if (cpu < LPFC_CHECK_CPU_CNT) {
struct lpfc_sli4_hdw_queue *hdwq =
&phba->sli4_hba.hdwq[lpfc_cmd->hdwq_no];
hdwq->cpucheck_xmt_io[cpu]++;
}
}
if (unlikely(phba->hdwqstat_on & LPFC_CHECK_SCSI_IO))
this_cpu_inc(phba->sli4_hba.c_stat->xmt_io);
#endif
err = lpfc_sli_issue_iocb(phba, LPFC_FCP_RING,
&lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB);
......
......@@ -697,13 +697,6 @@ struct lpfc_sli4_hdw_queue {
struct lpfc_lock_stat lock_conflict;
#endif
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
#define LPFC_CHECK_CPU_CNT 128
uint32_t cpucheck_rcv_io[LPFC_CHECK_CPU_CNT];
uint32_t cpucheck_xmt_io[LPFC_CHECK_CPU_CNT];
uint32_t cpucheck_cmpl_io[LPFC_CHECK_CPU_CNT];
#endif
/* Per HDWQ pool resources */
struct list_head sgl_list;
struct list_head cmd_rsp_buf_list;
......@@ -740,6 +733,15 @@ struct lpfc_sli4_hdw_queue {
#define lpfc_qp_spin_lock(lock, qp, lstat) spin_lock(lock)
#endif
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
struct lpfc_hdwq_stat {
u32 hdwq_no;
u32 rcv_io;
u32 xmt_io;
u32 cmpl_io;
};
#endif
struct lpfc_sli4_hba {
void __iomem *conf_regs_memmap_p; /* Kernel memory mapped address for
* config space registers
......@@ -921,6 +923,9 @@ struct lpfc_sli4_hba {
struct cpumask numa_mask;
uint16_t curr_disp_cpu;
struct lpfc_eq_intr_info __percpu *eq_info;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
struct lpfc_hdwq_stat __percpu *c_stat;
#endif
uint32_t conf_trunk;
#define lpfc_conf_trunk_port0_WORD conf_trunk
#define lpfc_conf_trunk_port0_SHIFT 0
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment