Commit 059ba072 authored by Achiad Shochat's avatar Achiad Shochat Committed by David S. Miller

net/mlx5e: Avoid TX CQE generation if more xmit packets expected

In order to save PCI BW consumed by TX CQEs and to reduce the amount of
CPU cache misses caused by TX CQE reading, we request TX CQE generation
only when skb->xmit_more=0.

As a consequence of the above, a single TX CQE may now indicate the
transmission completion of multiple TX SKBs.

This also handles a problem introduced in commit b1b8105ebf41 "net/mlx5e:
Support NETIF_F_SG" where we didn't ask for NOP completions while the
driver didn't have the proper code to handle this case.

Fixes: b1b8105ebf41 ('net/mlx5e: Support NETIF_F_SG')
Signed-off-by: default avatarAchiad Shochat <achiad@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
Signed-off-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 9fc59306
...@@ -210,7 +210,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) ...@@ -210,7 +210,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
sq->skb[pi] = skb; sq->skb[pi] = skb;
...@@ -225,8 +224,10 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) ...@@ -225,8 +224,10 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
sq->stats.stopped++; sq->stats.stopped++;
} }
if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) {
cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
mlx5e_tx_notify_hw(sq, wqe); mlx5e_tx_notify_hw(sq, wqe);
}
/* fill sq edge with nops to avoid wqe wrap around */ /* fill sq edge with nops to avoid wqe wrap around */
while ((sq->pc & wq->sz_m1) > sq->edge) while ((sq->pc & wq->sz_m1) > sq->edge)
...@@ -280,14 +281,22 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq) ...@@ -280,14 +281,22 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq)
for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) {
struct mlx5_cqe64 *cqe; struct mlx5_cqe64 *cqe;
struct sk_buff *skb; u16 wqe_counter;
u16 ci; bool last_wqe;
int j;
cqe = mlx5e_get_cqe(cq); cqe = mlx5e_get_cqe(cq);
if (!cqe) if (!cqe)
break; break;
wqe_counter = be16_to_cpu(cqe->wqe_counter);
do {
struct sk_buff *skb;
u16 ci;
int j;
last_wqe = (sqcc == wqe_counter);
ci = sqcc & sq->wq.sz_m1; ci = sqcc & sq->wq.sz_m1;
skb = sq->skb[ci]; skb = sq->skb[ci];
...@@ -303,13 +312,15 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq) ...@@ -303,13 +312,15 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq)
mlx5e_dma_get(sq, dma_fifo_cc, &addr, &size); mlx5e_dma_get(sq, dma_fifo_cc, &addr, &size);
dma_fifo_cc++; dma_fifo_cc++;
dma_unmap_single(sq->pdev, addr, size, DMA_TO_DEVICE); dma_unmap_single(sq->pdev, addr, size,
DMA_TO_DEVICE);
} }
npkts++; npkts++;
nbytes += MLX5E_TX_SKB_CB(skb)->num_bytes; nbytes += MLX5E_TX_SKB_CB(skb)->num_bytes;
sqcc += MLX5E_TX_SKB_CB(skb)->num_wqebbs; sqcc += MLX5E_TX_SKB_CB(skb)->num_wqebbs;
dev_kfree_skb(skb); dev_kfree_skb(skb);
} while (!last_wqe);
} }
mlx5_cqwq_update_db_record(&cq->wq); mlx5_cqwq_update_db_record(&cq->wq);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment