Commit 01e23015 authored by Mintz, Yuval's avatar Mintz, Yuval Committed by David S. Miller

qede: Optimize aggregation information size

Driver needs to maintain a structure per-each concurrent possible
open aggregation, but the structure storing that metadata is far from
being optimized - biggest waste in it is that there are 2 buffer metadata,
one for a replacement buffer when the aggregation begins and the other for
holding the first aggregation's buffer after it begins [as firmware might
still update it]. Those 2 can safely be united into a single metadata
structure.

struct qede_agg_info changes the following:

	/* size: 120, cachelines: 2, members: 9 */
	/* sum members: 114, holes: 1, sum holes: 4 */
	/* padding: 2 */
	/* paddings: 2, sum paddings: 8 */
	/* last cacheline: 56 bytes */
 -->
	/* size: 48, cachelines: 1, members: 9 */
	/* paddings: 1, sum paddings: 4 */
	/* last cacheline: 48 bytes */
Signed-off-by: default avatarYuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f54b8cd6
...@@ -225,15 +225,30 @@ enum qede_agg_state { ...@@ -225,15 +225,30 @@ enum qede_agg_state {
}; };
struct qede_agg_info { struct qede_agg_info {
struct sw_rx_data replace_buf; /* rx_buf is a data buffer that can be placed / consumed from rx bd
dma_addr_t replace_buf_mapping; * chain. It has two purposes: We will preallocate the data buffer
struct sw_rx_data start_buf; * for each aggregation when we open the interface and will place this
dma_addr_t start_buf_mapping; * buffer on the rx-bd-ring when we receive TPA_START. We don't want
struct eth_fast_path_rx_tpa_start_cqe start_cqe; * to be in a state where allocation fails, as we can't reuse the
enum qede_agg_state agg_state; * consumer buffer in the rx-chain since FW may still be writing to it
* (since header needs to be modified for TPA).
* The second purpose is to keep a pointer to the bd buffer during
* aggregation.
*/
struct sw_rx_data buffer;
dma_addr_t buffer_mapping;
struct sk_buff *skb; struct sk_buff *skb;
int frag_id;
/* We need some structs from the start cookie until termination */
u16 vlan_tag; u16 vlan_tag;
u16 start_cqe_bd_len;
u8 start_cqe_placement_offset;
u8 state;
u8 frag_id;
u8 tunnel_type;
}; };
struct qede_rx_queue { struct qede_rx_queue {
......
...@@ -1058,7 +1058,7 @@ static int qede_fill_frag_skb(struct qede_dev *edev, ...@@ -1058,7 +1058,7 @@ static int qede_fill_frag_skb(struct qede_dev *edev,
struct qede_agg_info *tpa_info = &rxq->tpa_info[tpa_agg_index]; struct qede_agg_info *tpa_info = &rxq->tpa_info[tpa_agg_index];
struct sk_buff *skb = tpa_info->skb; struct sk_buff *skb = tpa_info->skb;
if (unlikely(tpa_info->agg_state != QEDE_AGG_STATE_START)) if (unlikely(tpa_info->state != QEDE_AGG_STATE_START))
goto out; goto out;
/* Add one frag and update the appropriate fields in the skb */ /* Add one frag and update the appropriate fields in the skb */
...@@ -1084,7 +1084,7 @@ static int qede_fill_frag_skb(struct qede_dev *edev, ...@@ -1084,7 +1084,7 @@ static int qede_fill_frag_skb(struct qede_dev *edev,
return 0; return 0;
out: out:
tpa_info->agg_state = QEDE_AGG_STATE_ERROR; tpa_info->state = QEDE_AGG_STATE_ERROR;
qede_recycle_rx_bd_ring(rxq, edev, 1); qede_recycle_rx_bd_ring(rxq, edev, 1);
return -ENOMEM; return -ENOMEM;
} }
...@@ -1096,8 +1096,8 @@ static void qede_tpa_start(struct qede_dev *edev, ...@@ -1096,8 +1096,8 @@ static void qede_tpa_start(struct qede_dev *edev,
struct qede_agg_info *tpa_info = &rxq->tpa_info[cqe->tpa_agg_index]; struct qede_agg_info *tpa_info = &rxq->tpa_info[cqe->tpa_agg_index];
struct eth_rx_bd *rx_bd_cons = qed_chain_consume(&rxq->rx_bd_ring); struct eth_rx_bd *rx_bd_cons = qed_chain_consume(&rxq->rx_bd_ring);
struct eth_rx_bd *rx_bd_prod = qed_chain_produce(&rxq->rx_bd_ring); struct eth_rx_bd *rx_bd_prod = qed_chain_produce(&rxq->rx_bd_ring);
struct sw_rx_data *replace_buf = &tpa_info->replace_buf; struct sw_rx_data *replace_buf = &tpa_info->buffer;
dma_addr_t mapping = tpa_info->replace_buf_mapping; dma_addr_t mapping = tpa_info->buffer_mapping;
struct sw_rx_data *sw_rx_data_cons; struct sw_rx_data *sw_rx_data_cons;
struct sw_rx_data *sw_rx_data_prod; struct sw_rx_data *sw_rx_data_prod;
enum pkt_hash_types rxhash_type; enum pkt_hash_types rxhash_type;
...@@ -1122,11 +1122,11 @@ static void qede_tpa_start(struct qede_dev *edev, ...@@ -1122,11 +1122,11 @@ static void qede_tpa_start(struct qede_dev *edev,
/* move partial skb from cons to pool (don't unmap yet) /* move partial skb from cons to pool (don't unmap yet)
* save mapping, incase we drop the packet later on. * save mapping, incase we drop the packet later on.
*/ */
tpa_info->start_buf = *sw_rx_data_cons; tpa_info->buffer = *sw_rx_data_cons;
mapping = HILO_U64(le32_to_cpu(rx_bd_cons->addr.hi), mapping = HILO_U64(le32_to_cpu(rx_bd_cons->addr.hi),
le32_to_cpu(rx_bd_cons->addr.lo)); le32_to_cpu(rx_bd_cons->addr.lo));
tpa_info->start_buf_mapping = mapping; tpa_info->buffer_mapping = mapping;
rxq->sw_rx_cons++; rxq->sw_rx_cons++;
/* set tpa state to start only if we are able to allocate skb /* set tpa state to start only if we are able to allocate skb
...@@ -1137,20 +1137,22 @@ static void qede_tpa_start(struct qede_dev *edev, ...@@ -1137,20 +1137,22 @@ static void qede_tpa_start(struct qede_dev *edev,
le16_to_cpu(cqe->len_on_first_bd)); le16_to_cpu(cqe->len_on_first_bd));
if (unlikely(!tpa_info->skb)) { if (unlikely(!tpa_info->skb)) {
DP_NOTICE(edev, "Failed to allocate SKB for gro\n"); DP_NOTICE(edev, "Failed to allocate SKB for gro\n");
tpa_info->agg_state = QEDE_AGG_STATE_ERROR; tpa_info->state = QEDE_AGG_STATE_ERROR;
goto cons_buf; goto cons_buf;
} }
skb_put(tpa_info->skb, le16_to_cpu(cqe->len_on_first_bd));
memcpy(&tpa_info->start_cqe, cqe, sizeof(tpa_info->start_cqe));
/* Start filling in the aggregation info */ /* Start filling in the aggregation info */
skb_put(tpa_info->skb, le16_to_cpu(cqe->len_on_first_bd));
tpa_info->frag_id = 0; tpa_info->frag_id = 0;
tpa_info->agg_state = QEDE_AGG_STATE_START; tpa_info->state = QEDE_AGG_STATE_START;
rxhash = qede_get_rxhash(edev, cqe->bitfields, rxhash = qede_get_rxhash(edev, cqe->bitfields,
cqe->rss_hash, &rxhash_type); cqe->rss_hash, &rxhash_type);
skb_set_hash(tpa_info->skb, rxhash, rxhash_type); skb_set_hash(tpa_info->skb, rxhash, rxhash_type);
/* Store some information from first CQE */
tpa_info->start_cqe_placement_offset = cqe->placement_offset;
tpa_info->start_cqe_bd_len = le16_to_cpu(cqe->len_on_first_bd);
if ((le16_to_cpu(cqe->pars_flags.flags) >> if ((le16_to_cpu(cqe->pars_flags.flags) >>
PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT) & PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT) &
PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK) PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK)
...@@ -1169,7 +1171,7 @@ static void qede_tpa_start(struct qede_dev *edev, ...@@ -1169,7 +1171,7 @@ static void qede_tpa_start(struct qede_dev *edev,
if (unlikely(cqe->ext_bd_len_list[1])) { if (unlikely(cqe->ext_bd_len_list[1])) {
DP_ERR(edev, DP_ERR(edev,
"Unlikely - got a TPA aggregation with more than one ext_bd_len_list entry in the TPA start\n"); "Unlikely - got a TPA aggregation with more than one ext_bd_len_list entry in the TPA start\n");
tpa_info->agg_state = QEDE_AGG_STATE_ERROR; tpa_info->state = QEDE_AGG_STATE_ERROR;
} }
} }
...@@ -1276,7 +1278,7 @@ static void qede_tpa_end(struct qede_dev *edev, ...@@ -1276,7 +1278,7 @@ static void qede_tpa_end(struct qede_dev *edev,
DP_ERR(edev, DP_ERR(edev,
"Strange - TPA emd with more than a single len_list entry\n"); "Strange - TPA emd with more than a single len_list entry\n");
if (unlikely(tpa_info->agg_state != QEDE_AGG_STATE_START)) if (unlikely(tpa_info->state != QEDE_AGG_STATE_START))
goto err; goto err;
/* Sanity */ /* Sanity */
...@@ -1290,14 +1292,9 @@ static void qede_tpa_end(struct qede_dev *edev, ...@@ -1290,14 +1292,9 @@ static void qede_tpa_end(struct qede_dev *edev,
le16_to_cpu(cqe->total_packet_len), skb->len); le16_to_cpu(cqe->total_packet_len), skb->len);
memcpy(skb->data, memcpy(skb->data,
page_address(tpa_info->start_buf.data) + page_address(tpa_info->buffer.data) +
tpa_info->start_cqe.placement_offset + tpa_info->start_cqe_placement_offset +
tpa_info->start_buf.page_offset, tpa_info->buffer.page_offset, tpa_info->start_cqe_bd_len);
le16_to_cpu(tpa_info->start_cqe.len_on_first_bd));
/* Recycle [mapped] start buffer for the next replacement */
tpa_info->replace_buf = tpa_info->start_buf;
tpa_info->replace_buf_mapping = tpa_info->start_buf_mapping;
/* Finalize the SKB */ /* Finalize the SKB */
skb->protocol = eth_type_trans(skb, edev->ndev); skb->protocol = eth_type_trans(skb, edev->ndev);
...@@ -1310,18 +1307,11 @@ static void qede_tpa_end(struct qede_dev *edev, ...@@ -1310,18 +1307,11 @@ static void qede_tpa_end(struct qede_dev *edev,
qede_gro_receive(edev, fp, skb, tpa_info->vlan_tag); qede_gro_receive(edev, fp, skb, tpa_info->vlan_tag);
tpa_info->agg_state = QEDE_AGG_STATE_NONE; tpa_info->state = QEDE_AGG_STATE_NONE;
return; return;
err: err:
/* The BD starting the aggregation is still mapped; Re-use it for tpa_info->state = QEDE_AGG_STATE_NONE;
* future aggregations [as replacement buffer]
*/
memcpy(&tpa_info->replace_buf, &tpa_info->start_buf,
sizeof(struct sw_rx_data));
tpa_info->replace_buf_mapping = tpa_info->start_buf_mapping;
tpa_info->start_buf.data = NULL;
tpa_info->agg_state = QEDE_AGG_STATE_NONE;
dev_kfree_skb_any(tpa_info->skb); dev_kfree_skb_any(tpa_info->skb);
tpa_info->skb = NULL; tpa_info->skb = NULL;
} }
...@@ -2823,7 +2813,7 @@ static void qede_free_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) ...@@ -2823,7 +2813,7 @@ static void qede_free_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) { for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) {
struct qede_agg_info *tpa_info = &rxq->tpa_info[i]; struct qede_agg_info *tpa_info = &rxq->tpa_info[i];
struct sw_rx_data *replace_buf = &tpa_info->replace_buf; struct sw_rx_data *replace_buf = &tpa_info->buffer;
if (replace_buf->data) { if (replace_buf->data) {
dma_unmap_page(&edev->pdev->dev, dma_unmap_page(&edev->pdev->dev,
...@@ -2905,7 +2895,7 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) ...@@ -2905,7 +2895,7 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) { for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) {
struct qede_agg_info *tpa_info = &rxq->tpa_info[i]; struct qede_agg_info *tpa_info = &rxq->tpa_info[i];
struct sw_rx_data *replace_buf = &tpa_info->replace_buf; struct sw_rx_data *replace_buf = &tpa_info->buffer;
replace_buf->data = alloc_pages(GFP_ATOMIC, 0); replace_buf->data = alloc_pages(GFP_ATOMIC, 0);
if (unlikely(!replace_buf->data)) { if (unlikely(!replace_buf->data)) {
...@@ -2923,10 +2913,9 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) ...@@ -2923,10 +2913,9 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
} }
replace_buf->mapping = mapping; replace_buf->mapping = mapping;
tpa_info->replace_buf.page_offset = 0; tpa_info->buffer.page_offset = 0;
tpa_info->buffer_mapping = mapping;
tpa_info->replace_buf_mapping = mapping; tpa_info->state = QEDE_AGG_STATE_NONE;
tpa_info->agg_state = QEDE_AGG_STATE_NONE;
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment