Commit c490850a authored by James Smart's avatar James Smart Committed by Martin K. Petersen

scsi: lpfc: Adapt partitioned XRI lists to efficient sharing

The XRI get/put lists were partitioned per hardware queue. However, the
adapter rarely had sufficient resources to give a large number of resources
per queue. As such, it became common for a cpu to encounter a lack of XRI
resource and request the upper io stack to retry after returning a BUSY
condition. This occurred even though other cpus were idle and not using
their resources.

Create as efficient a scheme as possible to move resources to the cpus that
need them. Each cpu maintains a small private pool which it allocates from
for io. There is a watermark that the cpu attempts to keep in the private
pool.  The private pool, when empty, pulls from a global pool from the
cpu. When the cpu's global pool is empty it will pull from other cpu's
global pool. As there many cpu global pools (1 per cpu or hardware queue
count) and as each cpu selects what cpu to pull from at different rates and
at different times, it creates a radomizing effect that minimizes the
number of cpu's that will contend with each other when the steal XRI's from
another cpu's global pool.

On io completion, a cpu will push the XRI back on to its private pool.  A
watermark level is maintained for the private pool such that when it is
exceeded it will move XRI's to the CPU global pool so that other cpu's may
allocate them.

On NVME, as heartbeat commands are critical to get placed on the wire, a
single expedite pool is maintained. When a heartbeat is to be sent, it will
allocate an XRI from the expedite pool rather than the normal cpu
private/global pools. On any io completion, if a reduction in the expedite
pools is seen, it will be replenished before the XRI is placed on the cpu
private pool.

Statistics are added to aid understanding the XRI levels on each cpu and
their behaviors.
Signed-off-by: default avatarDick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: default avatarJames Smart <jsmart2021@gmail.com>
Reviewed-by: default avatarHannes Reinecke <hare@suse.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent ace44e48
......@@ -235,8 +235,6 @@ typedef struct lpfc_vpd {
} sli3Feat;
} lpfc_vpd_t;
struct lpfc_scsi_buf;
/*
* lpfc stat counters
......@@ -597,6 +595,13 @@ struct lpfc_mbox_ext_buf_ctx {
struct list_head ext_dmabuf_list;
};
struct lpfc_epd_pool {
/* Expedite pool */
struct list_head list;
u32 count;
spinlock_t lock; /* lock for expedite pool */
};
struct lpfc_ras_fwlog {
uint8_t *fwlog_buff;
uint32_t fw_buffcount; /* Buffer size posted to FW */
......@@ -618,19 +623,19 @@ struct lpfc_ras_fwlog {
struct lpfc_hba {
/* SCSI interface function jump table entries */
struct lpfc_scsi_buf * (*lpfc_get_scsi_buf)
struct lpfc_io_buf * (*lpfc_get_scsi_buf)
(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
struct scsi_cmnd *cmnd);
int (*lpfc_scsi_prep_dma_buf)
(struct lpfc_hba *, struct lpfc_scsi_buf *);
(struct lpfc_hba *, struct lpfc_io_buf *);
void (*lpfc_scsi_unprep_dma_buf)
(struct lpfc_hba *, struct lpfc_scsi_buf *);
(struct lpfc_hba *, struct lpfc_io_buf *);
void (*lpfc_release_scsi_buf)
(struct lpfc_hba *, struct lpfc_scsi_buf *);
(struct lpfc_hba *, struct lpfc_io_buf *);
void (*lpfc_rampdown_queue_depth)
(struct lpfc_hba *);
void (*lpfc_scsi_prep_cmnd)
(struct lpfc_vport *, struct lpfc_scsi_buf *,
(struct lpfc_vport *, struct lpfc_io_buf *,
struct lpfc_nodelist *);
/* IOCB interface function jump table entries */
......@@ -673,9 +678,12 @@ struct lpfc_hba {
(struct lpfc_hba *);
int (*lpfc_bg_scsi_prep_dma_buf)
(struct lpfc_hba *, struct lpfc_scsi_buf *);
(struct lpfc_hba *, struct lpfc_io_buf *);
/* Add new entries here */
/* expedite pool */
struct lpfc_epd_pool epd_pool;
/* SLI4 specific HBA data structure */
struct lpfc_sli4_hba sli4_hba;
......@@ -789,6 +797,7 @@ struct lpfc_hba {
/* HBA Config Parameters */
uint32_t cfg_ack0;
uint32_t cfg_xri_rebalancing;
uint32_t cfg_enable_npiv;
uint32_t cfg_enable_rrq;
uint32_t cfg_topology;
......@@ -1014,6 +1023,7 @@ struct lpfc_hba {
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
struct dentry *hba_debugfs_root;
atomic_t debugfs_vport_count;
struct dentry *debug_multixri_pools;
struct dentry *debug_hbqinfo;
struct dentry *debug_dumpHostSlim;
struct dentry *debug_dumpHBASlim;
......
......@@ -5266,6 +5266,12 @@ static DEVICE_ATTR_RW(lpfc_max_scsicmpl_time);
*/
LPFC_ATTR_R(ack0, 0, 0, 1, "Enable ACK0 support");
/*
# lpfc_xri_rebalancing: enable or disable XRI rebalancing feature
# range is [0,1]. Default value is 1.
*/
LPFC_ATTR_R(xri_rebalancing, 1, 0, 1, "Enable/Disable XRI rebalancing");
/*
* lpfc_io_sched: Determine scheduling algrithmn for issuing FCP cmds
* range is [0,1]. Default value is 0.
......@@ -5723,6 +5729,7 @@ struct device_attribute *lpfc_hba_attrs[] = {
&dev_attr_lpfc_use_adisc,
&dev_attr_lpfc_first_burst_size,
&dev_attr_lpfc_ack0,
&dev_attr_lpfc_xri_rebalancing,
&dev_attr_lpfc_topology,
&dev_attr_lpfc_scan_down,
&dev_attr_lpfc_link_speed,
......@@ -6788,6 +6795,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
lpfc_multi_ring_rctl_init(phba, lpfc_multi_ring_rctl);
lpfc_multi_ring_type_init(phba, lpfc_multi_ring_type);
lpfc_ack0_init(phba, lpfc_ack0);
lpfc_xri_rebalancing_init(phba, lpfc_xri_rebalancing);
lpfc_topology_init(phba, lpfc_topology);
lpfc_link_speed_init(phba, lpfc_link_speed);
lpfc_poll_tmo_init(phba, lpfc_poll_tmo);
......@@ -6846,6 +6854,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
phba->nvmet_support = 0;
phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
phba->cfg_enable_bbcr = 0;
phba->cfg_xri_rebalancing = 0;
} else {
/* We MUST have FCP support */
if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
......
......@@ -521,6 +521,7 @@ int lpfc_sli4_io_sgl_update(struct lpfc_hba *phba);
int lpfc_sli4_post_io_sgl_list(struct lpfc_hba *phba,
struct list_head *blist, int xricnt);
int lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc);
void lpfc_io_free(struct lpfc_hba *phba);
void lpfc_free_sgl_list(struct lpfc_hba *, struct list_head *);
uint32_t lpfc_sli_port_speed_get(struct lpfc_hba *);
int lpfc_sli4_request_firmware_update(struct lpfc_hba *, uint8_t);
......@@ -573,6 +574,21 @@ void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
struct lpfc_iocbq *cmdiocb,
struct lpfc_wcqe_complete *abts_cmpl);
void lpfc_create_multixri_pools(struct lpfc_hba *phba);
void lpfc_create_destroy_pools(struct lpfc_hba *phba);
void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid);
void lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, u32 hwqid, u32 cnt);
void lpfc_adjust_high_watermark(struct lpfc_hba *phba, u32 hwqid);
void lpfc_keep_pvt_pool_above_lowwm(struct lpfc_hba *phba, u32 hwqid);
void lpfc_adjust_pvt_pool_count(struct lpfc_hba *phba, u32 hwqid);
#ifdef LPFC_MXP_STAT
void lpfc_snapshot_mxp(struct lpfc_hba *, u32);
#endif
struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
struct lpfc_nodelist *ndlp, u32 hwqid,
int);
void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd,
struct lpfc_sli4_hdw_queue *qp);
void lpfc_nvme_cmd_template(void);
void lpfc_nvmet_cmd_template(void);
extern int lpfc_enable_nvmet_cnt;
......
......@@ -378,6 +378,126 @@ lpfc_debugfs_hbqinfo_data(struct lpfc_hba *phba, char *buf, int size)
return len;
}
/**
* lpfc_debugfs_multixripools_data - Display multi-XRI pools information
* @phba: The HBA to gather host buffer info from.
* @buf: The buffer to dump log into.
* @size: The maximum amount of data to process.
*
* Description:
* This routine displays current multi-XRI pools information including XRI
* count in public, private and txcmplq. It also displays current high and
* low watermark.
*
* Return Value:
* This routine returns the amount of bytes that were dumped into @buf and will
* not exceed @size.
**/
static int
lpfc_debugfs_multixripools_data(struct lpfc_hba *phba, char *buf, int size)
{
u32 i;
u32 hwq_count;
struct lpfc_sli4_hdw_queue *qp;
struct lpfc_multixri_pool *multixri_pool;
struct lpfc_pvt_pool *pvt_pool;
struct lpfc_pbl_pool *pbl_pool;
u32 txcmplq_cnt;
char tmp[LPFC_DEBUG_OUT_LINE_SZ] = {0};
/*
* Pbl: Current number of free XRIs in public pool
* Pvt: Current number of free XRIs in private pool
* Busy: Current number of outstanding XRIs
* HWM: Current high watermark
* pvt_empty: Incremented by 1 when IO submission fails (no xri)
* pbl_empty: Incremented by 1 when all pbl_pool are empty during
* IO submission
*/
scnprintf(tmp, sizeof(tmp),
"HWQ: Pbl Pvt Busy HWM | pvt_empty pbl_empty ");
if (strlcat(buf, tmp, size) >= size)
return strnlen(buf, size);
#ifdef LPFC_MXP_STAT
/*
* MAXH: Max high watermark seen so far
* above_lmt: Incremented by 1 if xri_owned > xri_limit during
* IO submission
* below_lmt: Incremented by 1 if xri_owned <= xri_limit during
* IO submission
* locPbl_hit: Incremented by 1 if successfully get a batch of XRI from
* local pbl_pool
* othPbl_hit: Incremented by 1 if successfully get a batch of XRI from
* other pbl_pool
*/
scnprintf(tmp, sizeof(tmp),
"MAXH above_lmt below_lmt locPbl_hit othPbl_hit");
if (strlcat(buf, tmp, size) >= size)
return strnlen(buf, size);
/*
* sPbl: snapshot of Pbl 15 sec after stat gets cleared
* sPvt: snapshot of Pvt 15 sec after stat gets cleared
* sBusy: snapshot of Busy 15 sec after stat gets cleared
*/
scnprintf(tmp, sizeof(tmp),
" | sPbl sPvt sBusy");
if (strlcat(buf, tmp, size) >= size)
return strnlen(buf, size);
#endif
scnprintf(tmp, sizeof(tmp), "\n");
if (strlcat(buf, tmp, size) >= size)
return strnlen(buf, size);
hwq_count = phba->cfg_hdw_queue;
for (i = 0; i < hwq_count; i++) {
qp = &phba->sli4_hba.hdwq[i];
multixri_pool = qp->p_multixri_pool;
if (!multixri_pool)
continue;
pbl_pool = &multixri_pool->pbl_pool;
pvt_pool = &multixri_pool->pvt_pool;
txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
if (qp->nvme_wq)
txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
scnprintf(tmp, sizeof(tmp),
"%03d: %4d %4d %4d %4d | %10d %10d ",
i, pbl_pool->count, pvt_pool->count,
txcmplq_cnt, pvt_pool->high_watermark,
qp->empty_io_bufs, multixri_pool->pbl_empty_count);
if (strlcat(buf, tmp, size) >= size)
break;
#ifdef LPFC_MXP_STAT
scnprintf(tmp, sizeof(tmp),
"%4d %10d %10d %10d %10d",
multixri_pool->stat_max_hwm,
multixri_pool->above_limit_count,
multixri_pool->below_limit_count,
multixri_pool->local_pbl_hit_count,
multixri_pool->other_pbl_hit_count);
if (strlcat(buf, tmp, size) >= size)
break;
scnprintf(tmp, sizeof(tmp),
" | %4d %4d %5d",
multixri_pool->stat_pbl_count,
multixri_pool->stat_pvt_count,
multixri_pool->stat_busy_count);
if (strlcat(buf, tmp, size) >= size)
break;
#endif
scnprintf(tmp, sizeof(tmp), "\n");
if (strlcat(buf, tmp, size) >= size)
break;
}
return strnlen(buf, size);
}
static int lpfc_debugfs_last_hdwq;
/**
......@@ -1751,6 +1871,53 @@ lpfc_debugfs_hbqinfo_open(struct inode *inode, struct file *file)
return rc;
}
/**
* lpfc_debugfs_multixripools_open - Open the multixripool debugfs buffer
* @inode: The inode pointer that contains a hba pointer.
* @file: The file pointer to attach the log output.
*
* Description:
* This routine is the entry point for the debugfs open file operation. It gets
* the hba from the i_private field in @inode, allocates the necessary buffer
* for the log, fills the buffer from the in-memory log for this hba, and then
* returns a pointer to that log in the private_data field in @file.
*
* Returns:
* This function returns zero if successful. On error it will return a negative
* error value.
**/
static int
lpfc_debugfs_multixripools_open(struct inode *inode, struct file *file)
{
struct lpfc_hba *phba = inode->i_private;
struct lpfc_debug *debug;
int rc = -ENOMEM;
debug = kmalloc(sizeof(*debug), GFP_KERNEL);
if (!debug)
goto out;
/* Round to page boundary */
debug->buffer = kzalloc(LPFC_DUMP_MULTIXRIPOOL_SIZE, GFP_KERNEL);
if (!debug->buffer) {
kfree(debug);
goto out;
}
if (phba->cfg_xri_rebalancing)
debug->len = lpfc_debugfs_multixripools_data(
phba, debug->buffer, LPFC_DUMP_MULTIXRIPOOL_SIZE);
else
debug->len = 0;
debug->i_private = inode->i_private;
file->private_data = debug;
rc = 0;
out:
return rc;
}
/**
* lpfc_debugfs_hdwqinfo_open - Open the hdwqinfo debugfs buffer
* @inode: The inode pointer that contains a vport pointer.
......@@ -2182,6 +2349,75 @@ lpfc_debugfs_dumpDataDif_release(struct inode *inode, struct file *file)
return 0;
}
/**
* lpfc_debugfs_multixripools_write - Clear multi-XRI pools statistics
* @file: The file pointer to read from.
* @buf: The buffer to copy the user data from.
* @nbytes: The number of bytes to get.
* @ppos: The position in the file to start reading from.
*
* Description:
* This routine clears multi-XRI pools statistics when buf contains "clear".
*
* Return Value:
* It returns the @nbytges passing in from debugfs user space when successful.
* In case of error conditions, it returns proper error code back to the user
* space.
**/
static ssize_t
lpfc_debugfs_multixripools_write(struct file *file, const char __user *buf,
size_t nbytes, loff_t *ppos)
{
struct lpfc_debug *debug = file->private_data;
struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
char mybuf[64];
char *pbuf;
u32 i;
u32 hwq_count;
struct lpfc_sli4_hdw_queue *qp;
struct lpfc_multixri_pool *multixri_pool;
if (nbytes > 64)
nbytes = 64;
/* Protect copy from user */
if (!access_ok(buf, nbytes))
return -EFAULT;
memset(mybuf, 0, sizeof(mybuf));
if (copy_from_user(mybuf, buf, nbytes))
return -EFAULT;
pbuf = &mybuf[0];
if ((strncmp(pbuf, "clear", strlen("clear"))) == 0) {
hwq_count = phba->cfg_hdw_queue;
for (i = 0; i < hwq_count; i++) {
qp = &phba->sli4_hba.hdwq[i];
multixri_pool = qp->p_multixri_pool;
if (!multixri_pool)
continue;
qp->empty_io_bufs = 0;
multixri_pool->pbl_empty_count = 0;
#ifdef LPFC_MXP_STAT
multixri_pool->above_limit_count = 0;
multixri_pool->below_limit_count = 0;
multixri_pool->stat_max_hwm = 0;
multixri_pool->local_pbl_hit_count = 0;
multixri_pool->other_pbl_hit_count = 0;
multixri_pool->stat_pbl_count = 0;
multixri_pool->stat_pvt_count = 0;
multixri_pool->stat_busy_count = 0;
multixri_pool->stat_snapshot_taken = 0;
#endif
}
return strlen(pbuf);
}
return -EINVAL;
}
static int
lpfc_debugfs_nvmestat_open(struct inode *inode, struct file *file)
......@@ -5044,6 +5280,16 @@ static const struct file_operations lpfc_debugfs_op_nodelist = {
.release = lpfc_debugfs_release,
};
#undef lpfc_debugfs_op_multixripools
static const struct file_operations lpfc_debugfs_op_multixripools = {
.owner = THIS_MODULE,
.open = lpfc_debugfs_multixripools_open,
.llseek = lpfc_debugfs_lseek,
.read = lpfc_debugfs_read,
.write = lpfc_debugfs_multixripools_write,
.release = lpfc_debugfs_release,
};
#undef lpfc_debugfs_op_hbqinfo
static const struct file_operations lpfc_debugfs_op_hbqinfo = {
.owner = THIS_MODULE,
......@@ -5490,6 +5736,19 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
atomic_inc(&lpfc_debugfs_hba_count);
atomic_set(&phba->debugfs_vport_count, 0);
/* Multi-XRI pools */
snprintf(name, sizeof(name), "multixripools");
phba->debug_multixri_pools =
debugfs_create_file(name, S_IFREG | 0644,
phba->hba_debugfs_root,
phba,
&lpfc_debugfs_op_multixripools);
if (!phba->debug_multixri_pools) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
"0527 Cannot create debugfs multixripools\n");
goto debug_failed;
}
/* Setup hbqinfo */
snprintf(name, sizeof(name), "hbqinfo");
phba->debug_hbqinfo =
......@@ -5906,6 +6165,9 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
if (atomic_read(&phba->debugfs_vport_count) == 0) {
debugfs_remove(phba->debug_multixri_pools); /* multixripools*/
phba->debug_multixri_pools = NULL;
debugfs_remove(phba->debug_hbqinfo); /* hbqinfo */
phba->debug_hbqinfo = NULL;
......
......@@ -287,6 +287,9 @@ struct lpfc_idiag {
#endif
/* multixripool output buffer size */
#define LPFC_DUMP_MULTIXRIPOOL_SIZE 8192
/* hdwqinfo output buffer size */
#define LPFC_HDWQINFO_SIZE 8192
......
This diff is collapsed.
......@@ -56,12 +56,12 @@
/* NVME initiator-based functions */
static struct lpfc_nvme_buf *
static struct lpfc_io_buf *
lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
int idx, int expedite);
static void
lpfc_release_nvme_buf(struct lpfc_hba *, struct lpfc_nvme_buf *);
lpfc_release_nvme_buf(struct lpfc_hba *, struct lpfc_io_buf *);
static struct nvme_fc_port_template lpfc_nvme_template;
......@@ -760,7 +760,7 @@ lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
/* Fix up the existing sgls for NVME IO. */
static inline void
lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
struct lpfc_nvme_buf *lpfc_ncmd,
struct lpfc_io_buf *lpfc_ncmd,
struct nvmefc_fcp_req *nCmd)
{
struct lpfc_hba *phba = vport->phba;
......@@ -857,7 +857,7 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
static void
lpfc_nvme_ktime(struct lpfc_hba *phba,
struct lpfc_nvme_buf *lpfc_ncmd)
struct lpfc_io_buf *lpfc_ncmd)
{
uint64_t seg1, seg2, seg3, seg4;
uint64_t segsum;
......@@ -955,8 +955,8 @@ static void
lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
struct lpfc_wcqe_complete *wcqe)
{
struct lpfc_nvme_buf *lpfc_ncmd =
(struct lpfc_nvme_buf *)pwqeIn->context1;
struct lpfc_io_buf *lpfc_ncmd =
(struct lpfc_io_buf *)pwqeIn->context1;
struct lpfc_vport *vport = pwqeIn->vport;
struct nvmefc_fcp_req *nCmd;
struct nvme_fc_ersp_iu *ep;
......@@ -1181,7 +1181,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
**/
static int
lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
struct lpfc_nvme_buf *lpfc_ncmd,
struct lpfc_io_buf *lpfc_ncmd,
struct lpfc_nodelist *pnode,
struct lpfc_fc4_ctrl_stat *cstat)
{
......@@ -1287,7 +1287,7 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
**/
static int
lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
struct lpfc_nvme_buf *lpfc_ncmd)
struct lpfc_io_buf *lpfc_ncmd)
{
struct lpfc_hba *phba = vport->phba;
struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
......@@ -1428,7 +1428,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
struct lpfc_vport *vport;
struct lpfc_hba *phba;
struct lpfc_nodelist *ndlp;
struct lpfc_nvme_buf *lpfc_ncmd;
struct lpfc_io_buf *lpfc_ncmd;
struct lpfc_nvme_rport *rport;
struct lpfc_nvme_qhandle *lpfc_queue_info;
struct lpfc_nvme_fcpreq_priv *freqpriv;
......@@ -1616,6 +1616,9 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
goto out_free_nvme_buf;
}
if (phba->cfg_xri_rebalancing)
lpfc_keep_pvt_pool_above_lowwm(phba, lpfc_ncmd->hdwq_no);
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
if (lpfc_ncmd->ts_cmd_start)
lpfc_ncmd->ts_cmd_wqput = ktime_get_ns();
......@@ -1704,7 +1707,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
struct lpfc_nvme_lport *lport;
struct lpfc_vport *vport;
struct lpfc_hba *phba;
struct lpfc_nvme_buf *lpfc_nbuf;
struct lpfc_io_buf *lpfc_nbuf;
struct lpfc_iocbq *abts_buf;
struct lpfc_iocbq *nvmereq_wqe;
struct lpfc_nvme_fcpreq_priv *freqpriv;
......@@ -1911,22 +1914,6 @@ static struct nvme_fc_port_template lpfc_nvme_template = {
.fcprqst_priv_sz = sizeof(struct lpfc_nvme_fcpreq_priv),
};
static inline struct lpfc_nvme_buf *
lpfc_nvme_buf(struct lpfc_hba *phba, int idx)
{
struct lpfc_sli4_hdw_queue *qp;
struct lpfc_nvme_buf *lpfc_ncmd, *lpfc_ncmd_next;
qp = &phba->sli4_hba.hdwq[idx];
list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
&qp->lpfc_io_buf_list_get, list) {
list_del_init(&lpfc_ncmd->list);
qp->get_io_bufs--;
return lpfc_ncmd;
}
return NULL;
}
/**
* lpfc_get_nvme_buf - Get a nvme buffer from io_buf_list of the HBA
* @phba: The HBA for which this call is being executed.
......@@ -1938,34 +1925,17 @@ lpfc_nvme_buf(struct lpfc_hba *phba, int idx)
* NULL - Error
* Pointer to lpfc_nvme_buf - Success
**/
static struct lpfc_nvme_buf *
static struct lpfc_io_buf *
lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
int idx, int expedite)
{
struct lpfc_nvme_buf *lpfc_ncmd = NULL;
struct lpfc_io_buf *lpfc_ncmd;
struct lpfc_sli4_hdw_queue *qp;
struct sli4_sge *sgl;
struct lpfc_iocbq *pwqeq;
union lpfc_wqe128 *wqe;
unsigned long iflag = 0;
qp = &phba->sli4_hba.hdwq[idx];
spin_lock_irqsave(&qp->io_buf_list_get_lock, iflag);
if (qp->get_io_bufs > LPFC_NVME_EXPEDITE_XRICNT || expedite)
lpfc_ncmd = lpfc_nvme_buf(phba, idx);
if (!lpfc_ncmd) {
spin_lock(&qp->io_buf_list_put_lock);
list_splice(&qp->lpfc_io_buf_list_put,
&qp->lpfc_io_buf_list_get);
qp->get_io_bufs += qp->put_io_bufs;
INIT_LIST_HEAD(&qp->lpfc_io_buf_list_put);
qp->put_io_bufs = 0;
spin_unlock(&qp->io_buf_list_put_lock);
if (qp->get_io_bufs > LPFC_NVME_EXPEDITE_XRICNT ||
expedite)
lpfc_ncmd = lpfc_nvme_buf(phba, idx);
}
spin_unlock_irqrestore(&qp->io_buf_list_get_lock, iflag);
lpfc_ncmd = lpfc_get_io_buf(phba, NULL, idx, expedite);
if (lpfc_ncmd) {
pwqeq = &(lpfc_ncmd->cur_iocbq);
......@@ -1978,8 +1948,6 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
pwqeq->wqe_cmpl = lpfc_nvme_io_cmd_wqe_cmpl;
lpfc_ncmd->start_time = jiffies;
lpfc_ncmd->flags = 0;
lpfc_ncmd->hdwq = qp;
lpfc_ncmd->hdwq_no = idx;
/* Rsp SGE will be filled in when we rcv an IO
* from the NVME Layer to be sent.
......@@ -1996,11 +1964,13 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
if (lpfc_ndlp_check_qdepth(phba, ndlp)) {
atomic_inc(&ndlp->cmd_pending);
lpfc_ncmd->flags |= LPFC_BUMP_QDEPTH;
lpfc_ncmd->flags |= LPFC_SBUF_BUMP_QDEPTH;
}
} else
} else {
qp = &phba->sli4_hba.hdwq[idx];
qp->empty_io_bufs++;
}
return lpfc_ncmd;
}
......@@ -2016,16 +1986,16 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
* aborted.
**/
static void
lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd)
{
struct lpfc_sli4_hdw_queue *qp;
unsigned long iflag = 0;
if ((lpfc_ncmd->flags & LPFC_BUMP_QDEPTH) && lpfc_ncmd->ndlp)
if ((lpfc_ncmd->flags & LPFC_SBUF_BUMP_QDEPTH) && lpfc_ncmd->ndlp)
atomic_dec(&lpfc_ncmd->ndlp->cmd_pending);
lpfc_ncmd->ndlp = NULL;
lpfc_ncmd->flags &= ~LPFC_BUMP_QDEPTH;
lpfc_ncmd->flags &= ~LPFC_SBUF_BUMP_QDEPTH;
qp = lpfc_ncmd->hdwq;
if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) {
......@@ -2040,17 +2010,8 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
&qp->lpfc_abts_nvme_buf_list);
qp->abts_nvme_io_bufs++;
spin_unlock_irqrestore(&qp->abts_nvme_buf_list_lock, iflag);
} else {
/* MUST zero fields if buffer is reused by another protocol */
lpfc_ncmd->nvmeCmd = NULL;
lpfc_ncmd->cur_iocbq.wqe_cmpl = NULL;
spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag);
list_add_tail(&lpfc_ncmd->list,
&qp->lpfc_io_buf_list_put);
qp->put_io_bufs++;
spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag);
}
} else
lpfc_release_io_buf(phba, (struct lpfc_io_buf *)lpfc_ncmd, qp);
}
/**
......@@ -2510,7 +2471,7 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
struct sli4_wcqe_xri_aborted *axri, int idx)
{
uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
struct lpfc_nvme_buf *lpfc_ncmd, *next_lpfc_ncmd;
struct lpfc_io_buf *lpfc_ncmd, *next_lpfc_ncmd;
struct nvmefc_fcp_req *nvme_cmd = NULL;
struct lpfc_nodelist *ndlp;
struct lpfc_sli4_hdw_queue *qp;
......
......@@ -71,49 +71,6 @@ struct lpfc_nvme_rport {
struct completion rport_unreg_done;
};
struct lpfc_nvme_buf {
/* Common fields */
struct list_head list;
void *data;
dma_addr_t dma_handle;
dma_addr_t dma_phys_sgl;
struct sli4_sge *dma_sgl;
struct lpfc_iocbq cur_iocbq;
struct lpfc_sli4_hdw_queue *hdwq;
uint16_t hdwq_no;
uint16_t cpu;
/* NVME specific fields */
struct nvmefc_fcp_req *nvmeCmd;
struct lpfc_nodelist *ndlp;
uint32_t timeout;
uint16_t flags; /* TBD convert exch_busy to flags */
#define LPFC_SBUF_XBUSY 0x1 /* SLI4 hba reported XB on WCQE cmpl */
#define LPFC_BUMP_QDEPTH 0x2 /* bumped queue depth counter */
uint16_t exch_busy; /* SLI4 hba reported XB on complete WCQE */
uint16_t status; /* From IOCB Word 7- ulpStatus */
uint32_t result; /* From IOCB Word 4. */
uint32_t seg_cnt; /* Number of scatter-gather segments returned by
* dma_map_sg. The driver needs this for calls
* to dma_unmap_sg.
*/
wait_queue_head_t *waitq;
unsigned long start_time;
uint16_t qidx;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
uint64_t ts_cmd_start;
uint64_t ts_last_cmd;
uint64_t ts_cmd_wqput;
uint64_t ts_isr_cmpl;
uint64_t ts_data_nvme;
#endif
};
struct lpfc_nvme_fcpreq_priv {
struct lpfc_nvme_buf *nvme_buf;
struct lpfc_io_buf *nvme_buf;
};
This diff is collapsed.
......@@ -130,59 +130,6 @@ struct lpfc_scsicmd_bkt {
uint32_t cmd_count;
};
struct lpfc_scsi_buf {
/* Common fields */
struct list_head list;
void *data;
dma_addr_t dma_handle;
dma_addr_t dma_phys_sgl;
struct ulp_bde64 *dma_sgl;
struct lpfc_iocbq cur_iocbq;
struct lpfc_sli4_hdw_queue *hdwq;
uint16_t hdwq_no;
uint16_t cpu;
/* SCSI specific fields */
struct scsi_cmnd *pCmd;
struct lpfc_rport_data *rdata;
struct lpfc_nodelist *ndlp;
uint32_t timeout;
uint16_t flags; /* TBD convert exch_busy to flags */
#define LPFC_SBUF_XBUSY 0x1 /* SLI4 hba reported XB on WCQE cmpl */
#define LPFC_SBUF_BUMP_QDEPTH 0x2 /* bumped queue depth counter */
uint16_t exch_busy; /* SLI4 hba reported XB on complete WCQE */
uint16_t status; /* From IOCB Word 7- ulpStatus */
uint32_t result; /* From IOCB Word 4. */
uint32_t seg_cnt; /* Number of scatter-gather segments returned by
* dma_map_sg. The driver needs this for calls
* to dma_unmap_sg. */
uint32_t prot_seg_cnt; /* seg_cnt's counterpart for protection data */
/*
* data and dma_handle are the kernel virtual and bus address of the
* dma-able buffer containing the fcp_cmd, fcp_rsp and a scatter
* gather bde list that supports the sg_tablesize value.
*/
struct fcp_cmnd *fcp_cmnd;
struct fcp_rsp *fcp_rsp;
wait_queue_head_t *waitq;
unsigned long start_time;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
/* Used to restore any changes to protection data for error injection */
void *prot_data_segment;
uint32_t prot_data;
uint32_t prot_data_type;
#define LPFC_INJERR_REFTAG 1
#define LPFC_INJERR_APPTAG 2
#define LPFC_INJERR_GUARD 3
#endif
};
#define LPFC_SCSI_DMA_EXT_SIZE 264
#define LPFC_BPL_SIZE 1024
#define MDAC_DIRECT_CMD 0x22
......
This diff is collapsed.
......@@ -20,6 +20,10 @@
* included with this package. *
*******************************************************************/
#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_SCSI_LPFC_DEBUG_FS)
#define CONFIG_SCSI_LPFC_DEBUG_FS
#endif
/* forward declaration for LPFC_IOCB_t's use */
struct lpfc_hba;
struct lpfc_vport;
......@@ -352,3 +356,84 @@ struct lpfc_sli {
#define LPFC_MBOX_SLI4_CONFIG_EXTENDED_TMO 300
/* Timeout for other flash-based outstanding mbox command (Seconds) */
#define LPFC_MBOX_TMO_FLASH_CMD 300
struct lpfc_io_buf {
/* Common fields */
struct list_head list;
void *data;
dma_addr_t dma_handle;
dma_addr_t dma_phys_sgl;
struct sli4_sge *dma_sgl;
struct lpfc_iocbq cur_iocbq;
struct lpfc_sli4_hdw_queue *hdwq;
uint16_t hdwq_no;
uint16_t cpu;
struct lpfc_nodelist *ndlp;
uint32_t timeout;
uint16_t flags; /* TBD convert exch_busy to flags */
#define LPFC_SBUF_XBUSY 0x1 /* SLI4 hba reported XB on WCQE cmpl */
#define LPFC_SBUF_BUMP_QDEPTH 0x2 /* bumped queue depth counter */
/* External DIF device IO conversions */
#define LPFC_SBUF_NORMAL_DIF 0x4 /* normal mode to insert/strip */
#define LPFC_SBUF_PASS_DIF 0x8 /* insert/strip mode to passthru */
#define LPFC_SBUF_NOT_POSTED 0x10 /* SGL failed post to FW. */
uint16_t exch_busy; /* SLI4 hba reported XB on complete WCQE */
uint16_t status; /* From IOCB Word 7- ulpStatus */
uint32_t result; /* From IOCB Word 4. */
uint32_t seg_cnt; /* Number of scatter-gather segments returned by
* dma_map_sg. The driver needs this for calls
* to dma_unmap_sg.
*/
unsigned long start_time;
bool expedite; /* this is an expedite io_buf */
union {
/* SCSI specific fields */
struct {
struct scsi_cmnd *pCmd;
struct lpfc_rport_data *rdata;
uint32_t prot_seg_cnt; /* seg_cnt's counterpart for
* protection data
*/
/*
* data and dma_handle are the kernel virtual and bus
* address of the dma-able buffer containing the
* fcp_cmd, fcp_rsp and a scatter gather bde list that
* supports the sg_tablesize value.
*/
struct fcp_cmnd *fcp_cmnd;
struct fcp_rsp *fcp_rsp;
wait_queue_head_t *waitq;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
/* Used to restore any changes to protection data for
* error injection
*/
void *prot_data_segment;
uint32_t prot_data;
uint32_t prot_data_type;
#define LPFC_INJERR_REFTAG 1
#define LPFC_INJERR_APPTAG 2
#define LPFC_INJERR_GUARD 3
#endif
};
/* NVME specific fields */
struct {
struct nvmefc_fcp_req *nvmeCmd;
uint16_t qidx;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
uint64_t ts_cmd_start;
uint64_t ts_last_cmd;
uint64_t ts_cmd_wqput;
uint64_t ts_isr_cmpl;
uint64_t ts_data_nvme;
#endif
};
};
};
......@@ -533,6 +533,59 @@ struct lpfc_vector_map_info {
};
#define LPFC_VECTOR_MAP_EMPTY 0xffff
/* Multi-XRI pool */
#define XRI_BATCH 8
struct lpfc_pbl_pool {
struct list_head list;
u32 count;
spinlock_t lock; /* lock for pbl_pool*/
};
struct lpfc_pvt_pool {
u32 low_watermark;
u32 high_watermark;
struct list_head list;
u32 count;
spinlock_t lock; /* lock for pvt_pool */
};
struct lpfc_multixri_pool {
u32 xri_limit;
/* Starting point when searching a pbl_pool with round-robin method */
u32 rrb_next_hwqid;
/* Used by lpfc_adjust_pvt_pool_count.
* io_req_count is incremented by 1 during IO submission. The heartbeat
* handler uses these two variables to determine if pvt_pool is idle or
* busy.
*/
u32 prev_io_req_count;
u32 io_req_count;
/* statistics */
u32 pbl_empty_count;
#ifdef LPFC_MXP_STAT
u32 above_limit_count;
u32 below_limit_count;
u32 local_pbl_hit_count;
u32 other_pbl_hit_count;
u32 stat_max_hwm;
#define LPFC_MXP_SNAPSHOT_TAKEN 3 /* snapshot is taken at 3rd heartbeats */
u32 stat_pbl_count;
u32 stat_pvt_count;
u32 stat_busy_count;
u32 stat_snapshot_taken;
#endif
/* TODO: Separate pvt_pool into get and put list */
struct lpfc_pbl_pool pbl_pool; /* Public free XRI pool */
struct lpfc_pvt_pool pvt_pool; /* Private free XRI pool */
};
struct lpfc_fc4_ctrl_stat {
u32 input_requests;
u32 output_requests;
......@@ -567,6 +620,9 @@ struct lpfc_sli4_hdw_queue {
uint32_t abts_scsi_io_bufs;
uint32_t abts_nvme_io_bufs;
/* Multi-XRI pool per HWQ */
struct lpfc_multixri_pool *p_multixri_pool;
/* FC-4 Stats counters */
struct lpfc_fc4_ctrl_stat nvme_cstat;
struct lpfc_fc4_ctrl_stat scsi_cstat;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment