Commit d79c9e9d authored by James Smart's avatar James Smart Committed by Martin K. Petersen

scsi: lpfc: Support dynamic unbounded SGL lists on G7 hardware.

Typical SLI-4 hardware supports up to 2 4KB pages to be registered per XRI
to contain the exchanges Scatter/Gather List. This caps the number of SGL
elements that can be in the SGL. There are not extensions to extend the
list out of the 2 pages.

The G7 hardware adds a SGE type that allows the SGL to be vectored to a
different scatter/gather list segment. And that segment can contain a SGE
to go to another segment and so on.  The initial segment must still be
pre-registered for the XRI, but it can be a much smaller amount (256Bytes)
as it can now be dynamically grown.  This much smaller allocation can
handle the SG list for most normal I/O, and the dynamic aspect allows it to
support many MB's if needed.

The implementation creates a pool which contains "segments" and which is
initially sized to hold the initial small segment per xri. If an I/O
requires additional segments, they are allocated from the pool.  If the
pool has no more segments, the pool is grown based on what is now
needed. After the I/O completes, the additional segments are returned to
the pool for use by other I/Os. Once allocated, the additional segments are
not released under the assumption of "if needed once, it will be needed
again". Pools are kept on a per-hardware queue basis, which is typically
1:1 per cpu, but may be shared by multiple cpus.

The switch to the smaller initial allocation significantly reduces the
memory footprint of the driver (which only grows if large ios are
issued). Based on the several K of XRIs for the adapter, the 8KB->256B
reduction can conserve 32MBs or more.

It has been observed with per-cpu resource pools that allocating a resource
on CPU A, may be put back on CPU B. While the get routines are distributed
evenly, only a limited subset of CPUs may be handling the put routines.
This can put a strain on the lpfc_put_cmd_rsp_buf_per_cpu routine because
all the resources are being put on a limited subset of CPUs.
Signed-off-by: default avatarDick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: default avatarJames Smart <jsmart2021@gmail.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent e62245d9
...@@ -51,6 +51,8 @@ struct lpfc_sli2_slim; ...@@ -51,6 +51,8 @@ struct lpfc_sli2_slim;
cmnd for menlo needs nearly twice as for firmware cmnd for menlo needs nearly twice as for firmware
downloads using bsg */ downloads using bsg */
#define LPFC_DEFAULT_XPSGL_SIZE 256
#define LPFC_MAX_SG_TABLESIZE 0xffff
#define LPFC_MIN_SG_SLI4_BUF_SZ 0x800 /* based on LPFC_DEFAULT_SG_SEG_CNT */ #define LPFC_MIN_SG_SLI4_BUF_SZ 0x800 /* based on LPFC_DEFAULT_SG_SEG_CNT */
#define LPFC_MAX_BG_SLI4_SEG_CNT_DIF 128 /* sg element count for BlockGuard */ #define LPFC_MAX_BG_SLI4_SEG_CNT_DIF 128 /* sg element count for BlockGuard */
#define LPFC_MAX_SG_SEG_CNT_DIF 512 /* sg element count per scsi cmnd */ #define LPFC_MAX_SG_SEG_CNT_DIF 512 /* sg element count per scsi cmnd */
...@@ -799,6 +801,7 @@ struct lpfc_hba { ...@@ -799,6 +801,7 @@ struct lpfc_hba {
/* HBA Config Parameters */ /* HBA Config Parameters */
uint32_t cfg_ack0; uint32_t cfg_ack0;
uint32_t cfg_xri_rebalancing; uint32_t cfg_xri_rebalancing;
uint32_t cfg_xpsgl;
uint32_t cfg_enable_npiv; uint32_t cfg_enable_npiv;
uint32_t cfg_enable_rrq; uint32_t cfg_enable_rrq;
uint32_t cfg_topology; uint32_t cfg_topology;
...@@ -904,6 +907,7 @@ struct lpfc_hba { ...@@ -904,6 +907,7 @@ struct lpfc_hba {
wait_queue_head_t work_waitq; wait_queue_head_t work_waitq;
struct task_struct *worker_thread; struct task_struct *worker_thread;
unsigned long data_flags; unsigned long data_flags;
uint32_t border_sge_num;
uint32_t hbq_in_use; /* HBQs in use flag */ uint32_t hbq_in_use; /* HBQs in use flag */
uint32_t hbq_count; /* Count of configured HBQs */ uint32_t hbq_count; /* Count of configured HBQs */
...@@ -986,6 +990,7 @@ struct lpfc_hba { ...@@ -986,6 +990,7 @@ struct lpfc_hba {
struct dma_pool *lpfc_nvmet_drb_pool; /* data receive buffer pool */ struct dma_pool *lpfc_nvmet_drb_pool; /* data receive buffer pool */
struct dma_pool *lpfc_hbq_pool; /* SLI3 hbq buffer pool */ struct dma_pool *lpfc_hbq_pool; /* SLI3 hbq buffer pool */
struct dma_pool *txrdy_payload_pool; struct dma_pool *txrdy_payload_pool;
struct dma_pool *lpfc_cmd_rsp_buf_pool;
struct lpfc_dma_pool lpfc_mbuf_safety_pool; struct lpfc_dma_pool lpfc_mbuf_safety_pool;
mempool_t *mbox_mem_pool; mempool_t *mbox_mem_pool;
......
...@@ -2050,6 +2050,23 @@ struct sli4_sge { /* SLI-4 */ ...@@ -2050,6 +2050,23 @@ struct sli4_sge { /* SLI-4 */
uint32_t sge_len; uint32_t sge_len;
}; };
struct sli4_hybrid_sgl {
struct list_head list_node;
struct sli4_sge *dma_sgl;
dma_addr_t dma_phys_sgl;
};
struct fcp_cmd_rsp_buf {
struct list_head list_node;
/* for storing cmd/rsp dma alloc'ed virt_addr */
struct fcp_cmnd *fcp_cmnd;
struct fcp_rsp *fcp_rsp;
/* for storing this cmd/rsp's dma mapped phys addr from per CPU pool */
dma_addr_t fcp_cmd_rsp_dma_handle;
};
struct sli4_sge_diseed { /* SLI-4 */ struct sli4_sge_diseed { /* SLI-4 */
uint32_t ref_tag; uint32_t ref_tag;
uint32_t ref_tag_tran; uint32_t ref_tag_tran;
...@@ -3449,6 +3466,9 @@ struct lpfc_sli4_parameters { ...@@ -3449,6 +3466,9 @@ struct lpfc_sli4_parameters {
#define cfg_xib_SHIFT 4 #define cfg_xib_SHIFT 4
#define cfg_xib_MASK 0x00000001 #define cfg_xib_MASK 0x00000001
#define cfg_xib_WORD word19 #define cfg_xib_WORD word19
#define cfg_xpsgl_SHIFT 6
#define cfg_xpsgl_MASK 0x00000001
#define cfg_xpsgl_WORD word19
#define cfg_eqdr_SHIFT 8 #define cfg_eqdr_SHIFT 8
#define cfg_eqdr_MASK 0x00000001 #define cfg_eqdr_MASK 0x00000001
#define cfg_eqdr_WORD word19 #define cfg_eqdr_WORD word19
......
This diff is collapsed.
...@@ -72,8 +72,8 @@ lpfc_mem_alloc_active_rrq_pool_s4(struct lpfc_hba *phba) { ...@@ -72,8 +72,8 @@ lpfc_mem_alloc_active_rrq_pool_s4(struct lpfc_hba *phba) {
* lpfc_mem_alloc - create and allocate all PCI and memory pools * lpfc_mem_alloc - create and allocate all PCI and memory pools
* @phba: HBA to allocate pools for * @phba: HBA to allocate pools for
* *
* Description: Creates and allocates PCI pools lpfc_sg_dma_buf_pool, * Description: Creates and allocates PCI pools lpfc_mbuf_pool,
* lpfc_mbuf_pool, lpfc_hrb_pool. Creates and allocates kmalloc-backed mempools * lpfc_hrb_pool. Creates and allocates kmalloc-backed mempools
* for LPFC_MBOXQ_t and lpfc_nodelist. Also allocates the VPI bitmask. * for LPFC_MBOXQ_t and lpfc_nodelist. Also allocates the VPI bitmask.
* *
* Notes: Not interrupt-safe. Must be called with no locks held. If any * Notes: Not interrupt-safe. Must be called with no locks held. If any
...@@ -89,36 +89,12 @@ lpfc_mem_alloc(struct lpfc_hba *phba, int align) ...@@ -89,36 +89,12 @@ lpfc_mem_alloc(struct lpfc_hba *phba, int align)
struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool; struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
int i; int i;
if (phba->sli_rev == LPFC_SLI_REV4) {
/* Calculate alignment */
if (phba->cfg_sg_dma_buf_size < SLI4_PAGE_SIZE)
i = phba->cfg_sg_dma_buf_size;
else
i = SLI4_PAGE_SIZE;
phba->lpfc_sg_dma_buf_pool =
dma_pool_create("lpfc_sg_dma_buf_pool",
&phba->pcidev->dev,
phba->cfg_sg_dma_buf_size,
i, 0);
if (!phba->lpfc_sg_dma_buf_pool)
goto fail;
} else {
phba->lpfc_sg_dma_buf_pool =
dma_pool_create("lpfc_sg_dma_buf_pool",
&phba->pcidev->dev, phba->cfg_sg_dma_buf_size,
align, 0);
if (!phba->lpfc_sg_dma_buf_pool)
goto fail;
}
phba->lpfc_mbuf_pool = dma_pool_create("lpfc_mbuf_pool", &phba->pcidev->dev, phba->lpfc_mbuf_pool = dma_pool_create("lpfc_mbuf_pool", &phba->pcidev->dev,
LPFC_BPL_SIZE, LPFC_BPL_SIZE,
align, 0); align, 0);
if (!phba->lpfc_mbuf_pool) if (!phba->lpfc_mbuf_pool)
goto fail_free_dma_buf_pool; goto fail;
pool->elements = kmalloc_array(LPFC_MBUF_POOL_SIZE, pool->elements = kmalloc_array(LPFC_MBUF_POOL_SIZE,
sizeof(struct lpfc_dmabuf), sizeof(struct lpfc_dmabuf),
...@@ -208,9 +184,6 @@ lpfc_mem_alloc(struct lpfc_hba *phba, int align) ...@@ -208,9 +184,6 @@ lpfc_mem_alloc(struct lpfc_hba *phba, int align)
fail_free_lpfc_mbuf_pool: fail_free_lpfc_mbuf_pool:
dma_pool_destroy(phba->lpfc_mbuf_pool); dma_pool_destroy(phba->lpfc_mbuf_pool);
phba->lpfc_mbuf_pool = NULL; phba->lpfc_mbuf_pool = NULL;
fail_free_dma_buf_pool:
dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
phba->lpfc_sg_dma_buf_pool = NULL;
fail: fail:
return -ENOMEM; return -ENOMEM;
} }
...@@ -287,10 +260,6 @@ lpfc_mem_free(struct lpfc_hba *phba) ...@@ -287,10 +260,6 @@ lpfc_mem_free(struct lpfc_hba *phba)
dma_pool_destroy(phba->lpfc_mbuf_pool); dma_pool_destroy(phba->lpfc_mbuf_pool);
phba->lpfc_mbuf_pool = NULL; phba->lpfc_mbuf_pool = NULL;
/* Free DMA buffer memory pool */
dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
phba->lpfc_sg_dma_buf_pool = NULL;
/* Free Device Data memory pool */ /* Free Device Data memory pool */
if (phba->device_data_mem_pool) { if (phba->device_data_mem_pool) {
/* Ensure all objects have been returned to the pool */ /* Ensure all objects have been returned to the pool */
...@@ -363,6 +332,13 @@ lpfc_mem_free_all(struct lpfc_hba *phba) ...@@ -363,6 +332,13 @@ lpfc_mem_free_all(struct lpfc_hba *phba)
/* Free and destroy all the allocated memory pools */ /* Free and destroy all the allocated memory pools */
lpfc_mem_free(phba); lpfc_mem_free(phba);
/* Free DMA buffer memory pool */
dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
phba->lpfc_sg_dma_buf_pool = NULL;
dma_pool_destroy(phba->lpfc_cmd_rsp_buf_pool);
phba->lpfc_cmd_rsp_buf_pool = NULL;
/* Free the iocb lookup array */ /* Free the iocb lookup array */
kfree(psli->iocbq_lookup); kfree(psli->iocbq_lookup);
psli->iocbq_lookup = NULL; psli->iocbq_lookup = NULL;
......
...@@ -1306,14 +1306,16 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport, ...@@ -1306,14 +1306,16 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd; struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
union lpfc_wqe128 *wqe = &lpfc_ncmd->cur_iocbq.wqe; union lpfc_wqe128 *wqe = &lpfc_ncmd->cur_iocbq.wqe;
struct sli4_sge *sgl = lpfc_ncmd->dma_sgl; struct sli4_sge *sgl = lpfc_ncmd->dma_sgl;
struct sli4_hybrid_sgl *sgl_xtra = NULL;
struct scatterlist *data_sg; struct scatterlist *data_sg;
struct sli4_sge *first_data_sgl; struct sli4_sge *first_data_sgl;
struct ulp_bde64 *bde; struct ulp_bde64 *bde;
dma_addr_t physaddr; dma_addr_t physaddr = 0;
uint32_t num_bde = 0; uint32_t num_bde = 0;
uint32_t dma_len; uint32_t dma_len = 0;
uint32_t dma_offset = 0; uint32_t dma_offset = 0;
int nseg, i; int nseg, i, j;
bool lsp_just_set = false;
/* Fix up the command and response DMA stuff. */ /* Fix up the command and response DMA stuff. */
lpfc_nvme_adj_fcp_sgls(vport, lpfc_ncmd, nCmd); lpfc_nvme_adj_fcp_sgls(vport, lpfc_ncmd, nCmd);
...@@ -1350,6 +1352,9 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport, ...@@ -1350,6 +1352,9 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
*/ */
nseg = nCmd->sg_cnt; nseg = nCmd->sg_cnt;
data_sg = nCmd->first_sgl; data_sg = nCmd->first_sgl;
/* for tracking the segment boundaries */
j = 2;
for (i = 0; i < nseg; i++) { for (i = 0; i < nseg; i++) {
if (data_sg == NULL) { if (data_sg == NULL) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
...@@ -1358,23 +1363,76 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport, ...@@ -1358,23 +1363,76 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
lpfc_ncmd->seg_cnt = 0; lpfc_ncmd->seg_cnt = 0;
return 1; return 1;
} }
physaddr = data_sg->dma_address;
dma_len = data_sg->length; sgl->word2 = 0;
sgl->addr_lo = cpu_to_le32(putPaddrLow(physaddr)); if ((num_bde + 1) == nseg) {
sgl->addr_hi = cpu_to_le32(putPaddrHigh(physaddr));
sgl->word2 = le32_to_cpu(sgl->word2);
if ((num_bde + 1) == nseg)
bf_set(lpfc_sli4_sge_last, sgl, 1); bf_set(lpfc_sli4_sge_last, sgl, 1);
else bf_set(lpfc_sli4_sge_type, sgl,
LPFC_SGE_TYPE_DATA);
} else {
bf_set(lpfc_sli4_sge_last, sgl, 0); bf_set(lpfc_sli4_sge_last, sgl, 0);
bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA); /* expand the segment */
sgl->word2 = cpu_to_le32(sgl->word2); if (!lsp_just_set &&
sgl->sge_len = cpu_to_le32(dma_len); !((j + 1) % phba->border_sge_num) &&
((nseg - 1) != i)) {
dma_offset += dma_len; /* set LSP type */
data_sg = sg_next(data_sg); bf_set(lpfc_sli4_sge_type, sgl,
sgl++; LPFC_SGE_TYPE_LSP);
sgl_xtra = lpfc_get_sgl_per_hdwq(
phba, lpfc_ncmd);
if (unlikely(!sgl_xtra)) {
lpfc_ncmd->seg_cnt = 0;
return 1;
}
sgl->addr_lo = cpu_to_le32(putPaddrLow(
sgl_xtra->dma_phys_sgl));
sgl->addr_hi = cpu_to_le32(putPaddrHigh(
sgl_xtra->dma_phys_sgl));
} else {
bf_set(lpfc_sli4_sge_type, sgl,
LPFC_SGE_TYPE_DATA);
}
}
if (!(bf_get(lpfc_sli4_sge_type, sgl) &
LPFC_SGE_TYPE_LSP)) {
if ((nseg - 1) == i)
bf_set(lpfc_sli4_sge_last, sgl, 1);
physaddr = data_sg->dma_address;
dma_len = data_sg->length;
sgl->addr_lo = cpu_to_le32(
putPaddrLow(physaddr));
sgl->addr_hi = cpu_to_le32(
putPaddrHigh(physaddr));
bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
sgl->word2 = cpu_to_le32(sgl->word2);
sgl->sge_len = cpu_to_le32(dma_len);
dma_offset += dma_len;
data_sg = sg_next(data_sg);
sgl++;
lsp_just_set = false;
} else {
sgl->word2 = cpu_to_le32(sgl->word2);
sgl->sge_len = cpu_to_le32(
phba->cfg_sg_dma_buf_size);
sgl = (struct sli4_sge *)sgl_xtra->dma_sgl;
i = i - 1;
lsp_just_set = true;
}
j++;
} }
if (phba->cfg_enable_pbde) { if (phba->cfg_enable_pbde) {
/* Use PBDE support for first SGL only, offset == 0 */ /* Use PBDE support for first SGL only, offset == 0 */
......
This diff is collapsed.
...@@ -20233,6 +20233,13 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd, ...@@ -20233,6 +20233,13 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd,
spin_unlock_irqrestore(&qp->io_buf_list_put_lock, spin_unlock_irqrestore(&qp->io_buf_list_put_lock,
iflag); iflag);
} }
if (phba->cfg_xpsgl && !phba->nvmet_support &&
!list_empty(&lpfc_ncmd->dma_sgl_xtra_list))
lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd);
if (!list_empty(&lpfc_ncmd->dma_cmd_rsp_list))
lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd);
} }
/** /**
...@@ -20447,3 +20454,288 @@ struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba, ...@@ -20447,3 +20454,288 @@ struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
return lpfc_cmd; return lpfc_cmd;
} }
/**
* lpfc_get_sgl_per_hdwq - Get one SGL chunk from hdwq's pool
* @phba: The HBA for which this call is being executed.
* @lpfc_buf: IO buf structure to append the SGL chunk
*
* This routine gets one SGL chunk buffer from hdwq's SGL chunk pool,
* and will allocate an SGL chunk if the pool is empty.
*
* Return codes:
* NULL - Error
* Pointer to sli4_hybrid_sgl - Success
**/
struct sli4_hybrid_sgl *
lpfc_get_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_buf)
{
struct sli4_hybrid_sgl *list_entry = NULL;
struct sli4_hybrid_sgl *tmp = NULL;
struct sli4_hybrid_sgl *allocated_sgl = NULL;
struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq;
struct list_head *buf_list = &hdwq->sgl_list;
spin_lock_irq(&hdwq->hdwq_lock);
if (likely(!list_empty(buf_list))) {
/* break off 1 chunk from the sgl_list */
list_for_each_entry_safe(list_entry, tmp,
buf_list, list_node) {
list_move_tail(&list_entry->list_node,
&lpfc_buf->dma_sgl_xtra_list);
break;
}
} else {
/* allocate more */
spin_unlock_irq(&hdwq->hdwq_lock);
tmp = kmalloc_node(sizeof(*tmp), GFP_ATOMIC,
cpu_to_node(smp_processor_id()));
if (!tmp) {
lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
"8353 error kmalloc memory for HDWQ "
"%d %s\n",
lpfc_buf->hdwq_no, __func__);
return NULL;
}
tmp->dma_sgl = dma_pool_alloc(phba->lpfc_sg_dma_buf_pool,
GFP_ATOMIC, &tmp->dma_phys_sgl);
if (!tmp->dma_sgl) {
lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
"8354 error pool_alloc memory for HDWQ "
"%d %s\n",
lpfc_buf->hdwq_no, __func__);
kfree(tmp);
return NULL;
}
spin_lock_irq(&hdwq->hdwq_lock);
list_add_tail(&tmp->list_node, &lpfc_buf->dma_sgl_xtra_list);
}
allocated_sgl = list_last_entry(&lpfc_buf->dma_sgl_xtra_list,
struct sli4_hybrid_sgl,
list_node);
spin_unlock_irq(&hdwq->hdwq_lock);
return allocated_sgl;
}
/**
* lpfc_put_sgl_per_hdwq - Put one SGL chunk into hdwq pool
* @phba: The HBA for which this call is being executed.
* @lpfc_buf: IO buf structure with the SGL chunk
*
* This routine puts one SGL chunk buffer into hdwq's SGL chunk pool.
*
* Return codes:
* 0 - Success
* -EINVAL - Error
**/
int
lpfc_put_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_buf)
{
int rc = 0;
struct sli4_hybrid_sgl *list_entry = NULL;
struct sli4_hybrid_sgl *tmp = NULL;
struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq;
struct list_head *buf_list = &hdwq->sgl_list;
spin_lock_irq(&hdwq->hdwq_lock);
if (likely(!list_empty(&lpfc_buf->dma_sgl_xtra_list))) {
list_for_each_entry_safe(list_entry, tmp,
&lpfc_buf->dma_sgl_xtra_list,
list_node) {
list_move_tail(&list_entry->list_node,
buf_list);
}
} else {
rc = -EINVAL;
}
spin_unlock_irq(&hdwq->hdwq_lock);
return rc;
}
/**
* lpfc_free_sgl_per_hdwq - Free all SGL chunks of hdwq pool
* @phba: phba object
* @hdwq: hdwq to cleanup sgl buff resources on
*
* This routine frees all SGL chunks of hdwq SGL chunk pool.
*
* Return codes:
* None
**/
void
lpfc_free_sgl_per_hdwq(struct lpfc_hba *phba,
struct lpfc_sli4_hdw_queue *hdwq)
{
struct list_head *buf_list = &hdwq->sgl_list;
struct sli4_hybrid_sgl *list_entry = NULL;
struct sli4_hybrid_sgl *tmp = NULL;
spin_lock_irq(&hdwq->hdwq_lock);
/* Free sgl pool */
list_for_each_entry_safe(list_entry, tmp,
buf_list, list_node) {
dma_pool_free(phba->lpfc_sg_dma_buf_pool,
list_entry->dma_sgl,
list_entry->dma_phys_sgl);
list_del(&list_entry->list_node);
kfree(list_entry);
}
spin_unlock_irq(&hdwq->hdwq_lock);
}
/**
* lpfc_get_cmd_rsp_buf_per_hdwq - Get one CMD/RSP buffer from hdwq
* @phba: The HBA for which this call is being executed.
* @lpfc_buf: IO buf structure to attach the CMD/RSP buffer
*
* This routine gets one CMD/RSP buffer from hdwq's CMD/RSP pool,
* and will allocate an CMD/RSP buffer if the pool is empty.
*
* Return codes:
* NULL - Error
* Pointer to fcp_cmd_rsp_buf - Success
**/
struct fcp_cmd_rsp_buf *
lpfc_get_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
struct lpfc_io_buf *lpfc_buf)
{
struct fcp_cmd_rsp_buf *list_entry = NULL;
struct fcp_cmd_rsp_buf *tmp = NULL;
struct fcp_cmd_rsp_buf *allocated_buf = NULL;
struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq;
struct list_head *buf_list = &hdwq->cmd_rsp_buf_list;
spin_lock_irq(&hdwq->hdwq_lock);
if (likely(!list_empty(buf_list))) {
/* break off 1 chunk from the list */
list_for_each_entry_safe(list_entry, tmp,
buf_list,
list_node) {
list_move_tail(&list_entry->list_node,
&lpfc_buf->dma_cmd_rsp_list);
break;
}
} else {
/* allocate more */
spin_unlock_irq(&hdwq->hdwq_lock);
tmp = kmalloc_node(sizeof(*tmp), GFP_ATOMIC,
cpu_to_node(smp_processor_id()));
if (!tmp) {
lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
"8355 error kmalloc memory for HDWQ "
"%d %s\n",
lpfc_buf->hdwq_no, __func__);
return NULL;
}
tmp->fcp_cmnd = dma_pool_alloc(phba->lpfc_cmd_rsp_buf_pool,
GFP_ATOMIC,
&tmp->fcp_cmd_rsp_dma_handle);
if (!tmp->fcp_cmnd) {
lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
"8356 error pool_alloc memory for HDWQ "
"%d %s\n",
lpfc_buf->hdwq_no, __func__);
kfree(tmp);
return NULL;
}
tmp->fcp_rsp = (struct fcp_rsp *)((uint8_t *)tmp->fcp_cmnd +
sizeof(struct fcp_cmnd));
spin_lock_irq(&hdwq->hdwq_lock);
list_add_tail(&tmp->list_node, &lpfc_buf->dma_cmd_rsp_list);
}
allocated_buf = list_last_entry(&lpfc_buf->dma_cmd_rsp_list,
struct fcp_cmd_rsp_buf,
list_node);
spin_unlock_irq(&hdwq->hdwq_lock);
return allocated_buf;
}
/**
* lpfc_put_cmd_rsp_buf_per_hdwq - Put one CMD/RSP buffer into hdwq pool
* @phba: The HBA for which this call is being executed.
* @lpfc_buf: IO buf structure with the CMD/RSP buf
*
* This routine puts one CMD/RSP buffer into executing CPU's CMD/RSP pool.
*
* Return codes:
* 0 - Success
* -EINVAL - Error
**/
int
lpfc_put_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
struct lpfc_io_buf *lpfc_buf)
{
int rc = 0;
struct fcp_cmd_rsp_buf *list_entry = NULL;
struct fcp_cmd_rsp_buf *tmp = NULL;
struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq;
struct list_head *buf_list = &hdwq->cmd_rsp_buf_list;
spin_lock_irq(&hdwq->hdwq_lock);
if (likely(!list_empty(&lpfc_buf->dma_cmd_rsp_list))) {
list_for_each_entry_safe(list_entry, tmp,
&lpfc_buf->dma_cmd_rsp_list,
list_node) {
list_move_tail(&list_entry->list_node,
buf_list);
}
} else {
rc = -EINVAL;
}
spin_unlock_irq(&hdwq->hdwq_lock);
return rc;
}
/**
* lpfc_free_cmd_rsp_buf_per_hdwq - Free all CMD/RSP chunks of hdwq pool
* @phba: phba object
* @hdwq: hdwq to cleanup cmd rsp buff resources on
*
* This routine frees all CMD/RSP buffers of hdwq's CMD/RSP buf pool.
*
* Return codes:
* None
**/
void
lpfc_free_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
struct lpfc_sli4_hdw_queue *hdwq)
{
struct list_head *buf_list = &hdwq->cmd_rsp_buf_list;
struct fcp_cmd_rsp_buf *list_entry = NULL;
struct fcp_cmd_rsp_buf *tmp = NULL;
spin_lock_irq(&hdwq->hdwq_lock);
/* Free cmd_rsp buf pool */
list_for_each_entry_safe(list_entry, tmp,
buf_list,
list_node) {
dma_pool_free(phba->lpfc_cmd_rsp_buf_pool,
list_entry->fcp_cmnd,
list_entry->fcp_cmd_rsp_dma_handle);
list_del(&list_entry->list_node);
kfree(list_entry);
}
spin_unlock_irq(&hdwq->hdwq_lock);
}
...@@ -365,9 +365,18 @@ struct lpfc_io_buf { ...@@ -365,9 +365,18 @@ struct lpfc_io_buf {
/* Common fields */ /* Common fields */
struct list_head list; struct list_head list;
void *data; void *data;
dma_addr_t dma_handle; dma_addr_t dma_handle;
dma_addr_t dma_phys_sgl; dma_addr_t dma_phys_sgl;
struct sli4_sge *dma_sgl;
struct sli4_sge *dma_sgl; /* initial segment chunk */
/* linked list of extra sli4_hybrid_sge */
struct list_head dma_sgl_xtra_list;
/* list head for fcp_cmd_rsp buf */
struct list_head dma_cmd_rsp_list;
struct lpfc_iocbq cur_iocbq; struct lpfc_iocbq cur_iocbq;
struct lpfc_sli4_hdw_queue *hdwq; struct lpfc_sli4_hdw_queue *hdwq;
uint16_t hdwq_no; uint16_t hdwq_no;
......
...@@ -680,6 +680,13 @@ struct lpfc_sli4_hdw_queue { ...@@ -680,6 +680,13 @@ struct lpfc_sli4_hdw_queue {
uint32_t cpucheck_xmt_io[LPFC_CHECK_CPU_CNT]; uint32_t cpucheck_xmt_io[LPFC_CHECK_CPU_CNT];
uint32_t cpucheck_cmpl_io[LPFC_CHECK_CPU_CNT]; uint32_t cpucheck_cmpl_io[LPFC_CHECK_CPU_CNT];
#endif #endif
/* Per HDWQ pool resources */
struct list_head sgl_list;
struct list_head cmd_rsp_buf_list;
/* Lock for syncing Per HDWQ pool resources */
spinlock_t hdwq_lock;
}; };
#ifdef LPFC_HDWQ_LOCK_STAT #ifdef LPFC_HDWQ_LOCK_STAT
...@@ -1089,6 +1096,17 @@ int lpfc_sli4_post_status_check(struct lpfc_hba *); ...@@ -1089,6 +1096,17 @@ int lpfc_sli4_post_status_check(struct lpfc_hba *);
uint8_t lpfc_sli_config_mbox_subsys_get(struct lpfc_hba *, LPFC_MBOXQ_t *); uint8_t lpfc_sli_config_mbox_subsys_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
uint8_t lpfc_sli_config_mbox_opcode_get(struct lpfc_hba *, LPFC_MBOXQ_t *); uint8_t lpfc_sli_config_mbox_opcode_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
void lpfc_sli4_ras_dma_free(struct lpfc_hba *phba); void lpfc_sli4_ras_dma_free(struct lpfc_hba *phba);
struct sli4_hybrid_sgl *lpfc_get_sgl_per_hdwq(struct lpfc_hba *phba,
struct lpfc_io_buf *buf);
struct fcp_cmd_rsp_buf *lpfc_get_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
struct lpfc_io_buf *buf);
int lpfc_put_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *buf);
int lpfc_put_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
struct lpfc_io_buf *buf);
void lpfc_free_sgl_per_hdwq(struct lpfc_hba *phba,
struct lpfc_sli4_hdw_queue *hdwq);
void lpfc_free_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
struct lpfc_sli4_hdw_queue *hdwq);
static inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx) static inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx)
{ {
return q->q_pgs[idx / q->entry_cnt_per_pg] + return q->q_pgs[idx / q->entry_cnt_per_pg] +
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment