Commit f0ab34f0 authored by Yevgeny Petrilin's avatar Yevgeny Petrilin Committed by David S. Miller

net/mlx4_en: using non collapsed CQ on TX

Moving to regular Completion Queue implementation (not collapsed)
Completion for each transmitted packet is written to new entry.
Signed-off-by: default avatarYevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0d9fdaa9
...@@ -51,10 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, ...@@ -51,10 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
int err; int err;
cq->size = entries; cq->size = entries;
if (mode == RX) cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
else
cq->buf_size = sizeof(struct mlx4_cqe);
cq->ring = ring; cq->ring = ring;
cq->is_tx = mode; cq->is_tx = mode;
...@@ -120,7 +117,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, ...@@ -120,7 +117,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
cq->size = priv->rx_ring[cq->ring].actual_size; cq->size = priv->rx_ring[cq->ring].actual_size;
err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar, err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar,
cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx); cq->wqres.db.dma, &cq->mcq, cq->vector, 0);
if (err) if (err)
return err; return err;
......
...@@ -307,59 +307,60 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) ...@@ -307,59 +307,60 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
return cnt; return cnt;
} }
static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
{ {
struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_cq *mcq = &cq->mcq; struct mlx4_cq *mcq = &cq->mcq;
struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring];
struct mlx4_cqe *cqe = cq->buf; struct mlx4_cqe *cqe;
u16 index; u16 index;
u16 new_index; u16 new_index, ring_index;
u32 txbbs_skipped = 0; u32 txbbs_skipped = 0;
u32 cq_last_sav; u32 cons_index = mcq->cons_index;
int size = cq->size;
/* index always points to the first TXBB of the last polled descriptor */ u32 size_mask = ring->size_mask;
index = ring->cons & ring->size_mask; struct mlx4_cqe *buf = cq->buf;
new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask;
if (index == new_index)
return;
if (!priv->port_up) if (!priv->port_up)
return; return;
/* index = cons_index & size_mask;
* We use a two-stage loop: cqe = &buf[index];
* - the first samples the HW-updated CQE ring_index = ring->cons & size_mask;
* - the second frees TXBBs until the last sample
* This lets us amortize CQE cache misses, while still polling the CQ /* Process all completed CQEs */
* until is quiescent. while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
*/ cons_index & size)) {
cq_last_sav = mcq->cons_index; /*
do { * make sure we read the CQE after we read the
* ownership bit
*/
rmb();
/* Skip over last polled CQE */
new_index = be16_to_cpu(cqe->wqe_index) & size_mask;
do { do {
/* Skip over last polled CQE */
index = (index + ring->last_nr_txbb) & ring->size_mask;
txbbs_skipped += ring->last_nr_txbb; txbbs_skipped += ring->last_nr_txbb;
ring_index = (ring_index + ring->last_nr_txbb) & size_mask;
/* Poll next CQE */ /* free next descriptor */
ring->last_nr_txbb = mlx4_en_free_tx_desc( ring->last_nr_txbb = mlx4_en_free_tx_desc(
priv, ring, index, priv, ring, ring_index,
!!((ring->cons + txbbs_skipped) & !!((ring->cons + txbbs_skipped) &
ring->size)); ring->size));
++mcq->cons_index; } while (ring_index != new_index);
} while (index != new_index); ++cons_index;
index = cons_index & size_mask;
cqe = &buf[index];
}
new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask;
} while (index != new_index);
AVG_PERF_COUNTER(priv->pstats.tx_coal_avg,
(u32) (mcq->cons_index - cq_last_sav));
/* /*
* To prevent CQ overflow we first update CQ consumer and only then * To prevent CQ overflow we first update CQ consumer and only then
* the ring consumer. * the ring consumer.
*/ */
mcq->cons_index = cons_index;
mlx4_cq_set_ci(mcq); mlx4_cq_set_ci(mcq);
wmb(); wmb();
ring->cons += txbbs_skipped; ring->cons += txbbs_skipped;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment