Commit 9afbee3d authored by James Smart's avatar James Smart Committed by Martin K. Petersen

scsi: lpfc: Reduce memory footprint for lpfc_queue

Currently the driver maintains a sideband structure which has a pointer for
each queue element. However, at 8 bytes per pointer, and up to 4k elements
per queue, and 100s of queues, this can take up a lot of memory.

Convert the driver to using an access routine that calculates the element
address based on its index rather than using the pointer table.
Signed-off-by: default avatarDick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: default avatarJames Smart <jsmart2021@gmail.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 9a66d990
...@@ -4135,7 +4135,7 @@ lpfc_idiag_queacc_read_qe(char *pbuffer, int len, struct lpfc_queue *pque, ...@@ -4135,7 +4135,7 @@ lpfc_idiag_queacc_read_qe(char *pbuffer, int len, struct lpfc_queue *pque,
"QE-INDEX[%04d]:\n", index); "QE-INDEX[%04d]:\n", index);
offset = 0; offset = 0;
pentry = pque->qe[index].address; pentry = lpfc_sli4_qe(pque, index);
while (esize > 0) { while (esize > 0) {
len += snprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len, len += snprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len,
"%08x ", *pentry); "%08x ", *pentry);
...@@ -4485,7 +4485,7 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf, ...@@ -4485,7 +4485,7 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
pque = (struct lpfc_queue *)idiag.ptr_private; pque = (struct lpfc_queue *)idiag.ptr_private;
if (offset > pque->entry_size/sizeof(uint32_t) - 1) if (offset > pque->entry_size/sizeof(uint32_t) - 1)
goto error_out; goto error_out;
pentry = pque->qe[index].address; pentry = lpfc_sli4_qe(pque, index);
pentry += offset; pentry += offset;
if (idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_WR) if (idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_WR)
*pentry = value; *pentry = value;
......
...@@ -345,7 +345,7 @@ lpfc_debug_dump_qe(struct lpfc_queue *q, uint32_t idx) ...@@ -345,7 +345,7 @@ lpfc_debug_dump_qe(struct lpfc_queue *q, uint32_t idx)
esize = q->entry_size; esize = q->entry_size;
qe_word_cnt = esize / sizeof(uint32_t); qe_word_cnt = esize / sizeof(uint32_t);
pword = q->qe[idx].address; pword = lpfc_sli4_qe(q, idx);
len = 0; len = 0;
len += snprintf(line_buf+len, LPFC_LBUF_SZ-len, "QE[%04d]: ", idx); len += snprintf(line_buf+len, LPFC_LBUF_SZ-len, "QE[%04d]: ", idx);
......
...@@ -151,7 +151,7 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe128 *wqe) ...@@ -151,7 +151,7 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe128 *wqe)
/* sanity check on queue memory */ /* sanity check on queue memory */
if (unlikely(!q)) if (unlikely(!q))
return -ENOMEM; return -ENOMEM;
temp_wqe = q->qe[q->host_index].wqe; temp_wqe = lpfc_sli4_qe(q, q->host_index);
/* If the host has not yet processed the next entry then we are done */ /* If the host has not yet processed the next entry then we are done */
idx = ((q->host_index + 1) % q->entry_count); idx = ((q->host_index + 1) % q->entry_count);
...@@ -271,7 +271,7 @@ lpfc_sli4_mq_put(struct lpfc_queue *q, struct lpfc_mqe *mqe) ...@@ -271,7 +271,7 @@ lpfc_sli4_mq_put(struct lpfc_queue *q, struct lpfc_mqe *mqe)
/* sanity check on queue memory */ /* sanity check on queue memory */
if (unlikely(!q)) if (unlikely(!q))
return -ENOMEM; return -ENOMEM;
temp_mqe = q->qe[q->host_index].mqe; temp_mqe = lpfc_sli4_qe(q, q->host_index);
/* If the host has not yet processed the next entry then we are done */ /* If the host has not yet processed the next entry then we are done */
if (((q->host_index + 1) % q->entry_count) == q->hba_index) if (((q->host_index + 1) % q->entry_count) == q->hba_index)
...@@ -331,7 +331,7 @@ lpfc_sli4_eq_get(struct lpfc_queue *q) ...@@ -331,7 +331,7 @@ lpfc_sli4_eq_get(struct lpfc_queue *q)
/* sanity check on queue memory */ /* sanity check on queue memory */
if (unlikely(!q)) if (unlikely(!q))
return NULL; return NULL;
eqe = q->qe[q->host_index].eqe; eqe = lpfc_sli4_qe(q, q->host_index);
/* If the next EQE is not valid then we are done */ /* If the next EQE is not valid then we are done */
if (bf_get_le32(lpfc_eqe_valid, eqe) != q->qe_valid) if (bf_get_le32(lpfc_eqe_valid, eqe) != q->qe_valid)
...@@ -545,7 +545,7 @@ lpfc_sli4_cq_get(struct lpfc_queue *q) ...@@ -545,7 +545,7 @@ lpfc_sli4_cq_get(struct lpfc_queue *q)
/* sanity check on queue memory */ /* sanity check on queue memory */
if (unlikely(!q)) if (unlikely(!q))
return NULL; return NULL;
cqe = q->qe[q->host_index].cqe; cqe = lpfc_sli4_qe(q, q->host_index);
/* If the next CQE is not valid then we are done */ /* If the next CQE is not valid then we are done */
if (bf_get_le32(lpfc_cqe_valid, cqe) != q->qe_valid) if (bf_get_le32(lpfc_cqe_valid, cqe) != q->qe_valid)
...@@ -667,8 +667,8 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, ...@@ -667,8 +667,8 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
return -ENOMEM; return -ENOMEM;
hq_put_index = hq->host_index; hq_put_index = hq->host_index;
dq_put_index = dq->host_index; dq_put_index = dq->host_index;
temp_hrqe = hq->qe[hq_put_index].rqe; temp_hrqe = lpfc_sli4_qe(hq, hq_put_index);
temp_drqe = dq->qe[dq_put_index].rqe; temp_drqe = lpfc_sli4_qe(dq, dq_put_index);
if (hq->type != LPFC_HRQ || dq->type != LPFC_DRQ) if (hq->type != LPFC_HRQ || dq->type != LPFC_DRQ)
return -EINVAL; return -EINVAL;
...@@ -7878,8 +7878,9 @@ lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba) ...@@ -7878,8 +7878,9 @@ lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba)
mcq = phba->sli4_hba.mbx_cq; mcq = phba->sli4_hba.mbx_cq;
idx = mcq->hba_index; idx = mcq->hba_index;
qe_valid = mcq->qe_valid; qe_valid = mcq->qe_valid;
while (bf_get_le32(lpfc_cqe_valid, mcq->qe[idx].cqe) == qe_valid) { while (bf_get_le32(lpfc_cqe_valid,
mcqe = (struct lpfc_mcqe *)mcq->qe[idx].cqe; (struct lpfc_cqe *)lpfc_sli4_qe(mcq, idx)) == qe_valid) {
mcqe = (struct lpfc_mcqe *)(lpfc_sli4_qe(mcq, idx));
if (bf_get_le32(lpfc_trailer_completed, mcqe) && if (bf_get_le32(lpfc_trailer_completed, mcqe) &&
(!bf_get_le32(lpfc_trailer_async, mcqe))) { (!bf_get_le32(lpfc_trailer_async, mcqe))) {
pending_completions = true; pending_completions = true;
...@@ -14507,24 +14508,22 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size, ...@@ -14507,24 +14508,22 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
{ {
struct lpfc_queue *queue; struct lpfc_queue *queue;
struct lpfc_dmabuf *dmabuf; struct lpfc_dmabuf *dmabuf;
int x, total_qe_count;
void *dma_pointer;
uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz; uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz;
uint16_t x, pgcnt;
if (!phba->sli4_hba.pc_sli4_params.supported) if (!phba->sli4_hba.pc_sli4_params.supported)
hw_page_size = page_size; hw_page_size = page_size;
pgcnt = ALIGN(entry_size * entry_count, hw_page_size) / hw_page_size;
/* If needed, Adjust page count to match the max the adapter supports */
if (pgcnt > phba->sli4_hba.pc_sli4_params.wqpcnt)
pgcnt = phba->sli4_hba.pc_sli4_params.wqpcnt;
queue = kzalloc(sizeof(struct lpfc_queue) + queue = kzalloc(sizeof(struct lpfc_queue) +
(sizeof(union sli4_qe) * entry_count), GFP_KERNEL); (sizeof(void *) * pgcnt), GFP_KERNEL);
if (!queue) if (!queue)
return NULL; return NULL;
queue->page_count = (ALIGN(entry_size * entry_count,
hw_page_size))/hw_page_size;
/* If needed, Adjust page count to match the max the adapter supports */
if (phba->sli4_hba.pc_sli4_params.wqpcnt &&
(queue->page_count > phba->sli4_hba.pc_sli4_params.wqpcnt))
queue->page_count = phba->sli4_hba.pc_sli4_params.wqpcnt;
INIT_LIST_HEAD(&queue->list); INIT_LIST_HEAD(&queue->list);
INIT_LIST_HEAD(&queue->wq_list); INIT_LIST_HEAD(&queue->wq_list);
...@@ -14536,12 +14535,15 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size, ...@@ -14536,12 +14535,15 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
/* Set queue parameters now. If the system cannot provide memory /* Set queue parameters now. If the system cannot provide memory
* resources, the free routine needs to know what was allocated. * resources, the free routine needs to know what was allocated.
*/ */
queue->page_count = pgcnt;
queue->q_pgs = (void **)&queue[1];
queue->entry_cnt_per_pg = hw_page_size / entry_size;
queue->entry_size = entry_size; queue->entry_size = entry_size;
queue->entry_count = entry_count; queue->entry_count = entry_count;
queue->page_size = hw_page_size; queue->page_size = hw_page_size;
queue->phba = phba; queue->phba = phba;
for (x = 0, total_qe_count = 0; x < queue->page_count; x++) { for (x = 0; x < queue->page_count; x++) {
dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
if (!dmabuf) if (!dmabuf)
goto out_fail; goto out_fail;
...@@ -14554,13 +14556,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size, ...@@ -14554,13 +14556,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
} }
dmabuf->buffer_tag = x; dmabuf->buffer_tag = x;
list_add_tail(&dmabuf->list, &queue->page_list); list_add_tail(&dmabuf->list, &queue->page_list);
/* initialize queue's entry array */ /* use lpfc_sli4_qe to index a paritcular entry in this page */
dma_pointer = dmabuf->virt; queue->q_pgs[x] = dmabuf->virt;
for (; total_qe_count < entry_count &&
dma_pointer < (hw_page_size + dmabuf->virt);
total_qe_count++, dma_pointer += entry_size) {
queue->qe[total_qe_count].address = dma_pointer;
}
} }
INIT_WORK(&queue->irqwork, lpfc_sli4_hba_process_cq); INIT_WORK(&queue->irqwork, lpfc_sli4_hba_process_cq);
INIT_WORK(&queue->spwork, lpfc_sli4_sp_process_cq); INIT_WORK(&queue->spwork, lpfc_sli4_sp_process_cq);
...@@ -14575,6 +14572,12 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size, ...@@ -14575,6 +14572,12 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
return NULL; return NULL;
} }
inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx)
{
return q->q_pgs[idx / q->entry_cnt_per_pg] +
(q->entry_size * (idx % q->entry_cnt_per_pg));
}
/** /**
* lpfc_dual_chute_pci_bar_map - Map pci base address register to host memory * lpfc_dual_chute_pci_bar_map - Map pci base address register to host memory
* @phba: HBA structure that indicates port to create a queue on. * @phba: HBA structure that indicates port to create a queue on.
......
...@@ -117,21 +117,6 @@ enum lpfc_sli4_queue_subtype { ...@@ -117,21 +117,6 @@ enum lpfc_sli4_queue_subtype {
LPFC_USOL LPFC_USOL
}; };
union sli4_qe {
void *address;
struct lpfc_eqe *eqe;
struct lpfc_cqe *cqe;
struct lpfc_mcqe *mcqe;
struct lpfc_wcqe_complete *wcqe_complete;
struct lpfc_wcqe_release *wcqe_release;
struct sli4_wcqe_xri_aborted *wcqe_xri_aborted;
struct lpfc_rcqe_complete *rcqe_complete;
struct lpfc_mqe *mqe;
union lpfc_wqe *wqe;
union lpfc_wqe128 *wqe128;
struct lpfc_rqe *rqe;
};
/* RQ buffer list */ /* RQ buffer list */
struct lpfc_rqb { struct lpfc_rqb {
uint16_t entry_count; /* Current number of RQ slots */ uint16_t entry_count; /* Current number of RQ slots */
...@@ -157,6 +142,7 @@ struct lpfc_queue { ...@@ -157,6 +142,7 @@ struct lpfc_queue {
struct list_head cpu_list; struct list_head cpu_list;
uint32_t entry_count; /* Number of entries to support on the queue */ uint32_t entry_count; /* Number of entries to support on the queue */
uint32_t entry_size; /* Size of each queue entry. */ uint32_t entry_size; /* Size of each queue entry. */
uint32_t entry_cnt_per_pg;
uint32_t notify_interval; /* Queue Notification Interval uint32_t notify_interval; /* Queue Notification Interval
* For chip->host queues (EQ, CQ, RQ): * For chip->host queues (EQ, CQ, RQ):
* specifies the interval (number of * specifies the interval (number of
...@@ -254,7 +240,7 @@ struct lpfc_queue { ...@@ -254,7 +240,7 @@ struct lpfc_queue {
uint16_t last_cpu; /* most recent cpu */ uint16_t last_cpu; /* most recent cpu */
uint8_t qe_valid; uint8_t qe_valid;
struct lpfc_queue *assoc_qp; struct lpfc_queue *assoc_qp;
union sli4_qe qe[1]; /* array to index entries (must be last) */ void **q_pgs; /* array to index entries per page */
}; };
struct lpfc_sli4_link { struct lpfc_sli4_link {
...@@ -1092,3 +1078,4 @@ int lpfc_sli4_post_status_check(struct lpfc_hba *); ...@@ -1092,3 +1078,4 @@ int lpfc_sli4_post_status_check(struct lpfc_hba *);
uint8_t lpfc_sli_config_mbox_subsys_get(struct lpfc_hba *, LPFC_MBOXQ_t *); uint8_t lpfc_sli_config_mbox_subsys_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
uint8_t lpfc_sli_config_mbox_opcode_get(struct lpfc_hba *, LPFC_MBOXQ_t *); uint8_t lpfc_sli_config_mbox_opcode_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
void lpfc_sli4_ras_dma_free(struct lpfc_hba *phba); void lpfc_sli4_ras_dma_free(struct lpfc_hba *phba);
inline void *lpfc_sli4_qe(struct lpfc_queue *, uint16_t);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment