Commit 76c31e5f authored by Aya Levin's avatar Aya Levin Committed by Saeed Mahameed

net/mlx5e: Use FW limitation for max MPW WQEBBs

Calculate maximal count of MPW WQEBBs on SQ's creation and store it
there. Remove MLX5E_TX_MPW_MAX_NUM_DS and MLX5E_TX_MPW_MAX_WQEBBS.
Update mlx5e_tx_mpwqe_is_full() and mlx5e_xdp_mpqwe_is_full() .
Signed-off-by: default avatarAya Levin <ayal@nvidia.com>
Reviewed-by: default avatarTariq Toukan <tariqt@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent c27bd171
...@@ -172,8 +172,9 @@ struct page_pool; ...@@ -172,8 +172,9 @@ struct page_pool;
#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\ #define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\
ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT) ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT)
#define MLX5E_MAX_KLM_PER_WQE \ #define MLX5E_MAX_KLM_PER_WQE(mdev) \
MLX5E_KLM_ENTRIES_PER_WQE(MLX5E_TX_MPW_MAX_NUM_DS << MLX5_MKEY_BSF_OCTO_SIZE) MLX5E_KLM_ENTRIES_PER_WQE(mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev)) \
<< MLX5_MKEY_BSF_OCTO_SIZE)
#define MLX5E_MSG_LEVEL NETIF_MSG_LINK #define MLX5E_MSG_LEVEL NETIF_MSG_LINK
...@@ -231,6 +232,22 @@ static inline u16 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev) ...@@ -231,6 +232,22 @@ static inline u16 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev)
MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB); MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB);
} }
static inline u16 mlx5e_get_sw_max_sq_mpw_wqebbs(u16 max_sq_wqebbs)
{
/* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS.
* Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16,
* see mlx5e_get_max_sq_wqebbs(), the multiplication (16 * 4 == 64)
* overflows the 6-bit DS field of Ctrl Segment. Use a bound lower
* than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be
* cache-aligned.
*/
#if L1_CACHE_BYTES < 128
return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1);
#else
return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 2);
#endif
}
struct mlx5e_tx_wqe { struct mlx5e_tx_wqe {
struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_eth_seg eth; struct mlx5_wqe_eth_seg eth;
...@@ -437,6 +454,7 @@ struct mlx5e_txqsq { ...@@ -437,6 +454,7 @@ struct mlx5e_txqsq {
struct netdev_queue *txq; struct netdev_queue *txq;
u32 sqn; u32 sqn;
u16 stop_room; u16 stop_room;
u16 max_sq_mpw_wqebbs;
u8 min_inline_mode; u8 min_inline_mode;
struct device *pdev; struct device *pdev;
__be32 mkey_be; __be32 mkey_be;
...@@ -551,6 +569,7 @@ struct mlx5e_xdpsq { ...@@ -551,6 +569,7 @@ struct mlx5e_xdpsq {
struct device *pdev; struct device *pdev;
__be32 mkey_be; __be32 mkey_be;
u16 stop_room; u16 stop_room;
u16 max_sq_mpw_wqebbs;
u8 min_inline_mode; u8 min_inline_mode;
unsigned long state; unsigned long state;
unsigned int hw_mtu; unsigned int hw_mtu;
......
...@@ -717,7 +717,7 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, ...@@ -717,7 +717,7 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
u32 wqebbs; u32 wqebbs;
max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE; max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev);
max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param); max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr; max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr;
rest = max_hd_per_wqe % max_klm_per_umr; rest = max_hd_per_wqe % max_klm_per_umr;
......
...@@ -9,19 +9,6 @@ ...@@ -9,19 +9,6 @@
#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) #define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
* (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
* We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
* full-session WQE be cache-aligned.
*/
#if L1_CACHE_BYTES < 128
#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
#else
#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
#endif
#define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
...@@ -308,9 +295,9 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) ...@@ -308,9 +295,9 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more); void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more);
void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq); void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq);
static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session) static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
{ {
return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS; return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
} }
static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
......
...@@ -199,7 +199,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) ...@@ -199,7 +199,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
struct mlx5e_tx_wqe *wqe; struct mlx5e_tx_wqe *wqe;
u16 pi; u16 pi;
pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS); pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
wqe = MLX5E_TX_FETCH_WQE(sq, pi); wqe = MLX5E_TX_FETCH_WQE(sq, pi);
net_prefetchw(wqe->data); net_prefetchw(wqe->data);
...@@ -286,7 +286,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx ...@@ -286,7 +286,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx
mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
if (unlikely(mlx5e_xdp_mpqwe_is_full(session))) if (unlikely(mlx5e_xdp_mpqwe_is_full(session, sq->max_sq_mpw_wqebbs)))
mlx5e_xdp_mpwqe_complete(sq); mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
......
...@@ -123,12 +123,13 @@ static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur) ...@@ -123,12 +123,13 @@ static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur)
return cur; return cur;
} }
static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session) static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
{ {
if (session->inline_on) if (session->inline_on)
return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT >
MLX5E_TX_MPW_MAX_NUM_DS; max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
return mlx5e_tx_mpwqe_is_full(session);
return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs);
} }
struct mlx5e_xdp_wqe_info { struct mlx5e_xdp_wqe_info {
......
...@@ -1167,6 +1167,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, ...@@ -1167,6 +1167,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
&c->priv->channel_stats[c->ix]->rq_xdpsq; &c->priv->channel_stats[c->ix]->rq_xdpsq;
sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
sq->stop_room = MLX5E_STOP_ROOM(sq->max_sq_wqebbs); sq->stop_room = MLX5E_STOP_ROOM(sq->max_sq_wqebbs);
sq->max_sq_mpw_wqebbs = mlx5e_get_sw_max_sq_mpw_wqebbs(sq->max_sq_wqebbs);
param->wq.db_numa_node = cpu_to_node(c->cpu); param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl); err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
...@@ -1328,6 +1329,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, ...@@ -1328,6 +1329,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->min_inline_mode = params->tx_min_inline_mode; sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
sq->max_sq_mpw_wqebbs = mlx5e_get_sw_max_sq_mpw_wqebbs(sq->max_sq_wqebbs);
INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
......
...@@ -620,7 +620,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) ...@@ -620,7 +620,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
struct mlx5e_icosq *sq = rq->icosq; struct mlx5e_icosq *sq = rq->icosq;
int i, err, max_klm_entries, len; int i, err, max_klm_entries, len;
max_klm_entries = MLX5E_MAX_KLM_PER_WQE; max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev);
klm_entries = bitmap_find_window(shampo->bitmap, klm_entries = bitmap_find_window(shampo->bitmap,
shampo->hd_per_wqe, shampo->hd_per_wqe,
shampo->hd_per_wq, shampo->pi); shampo->hd_per_wq, shampo->pi);
......
...@@ -544,7 +544,7 @@ static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq, ...@@ -544,7 +544,7 @@ static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe *wqe; struct mlx5e_tx_wqe *wqe;
u16 pi; u16 pi;
pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS); pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
wqe = MLX5E_TX_FETCH_WQE(sq, pi); wqe = MLX5E_TX_FETCH_WQE(sq, pi);
net_prefetchw(wqe->data); net_prefetchw(wqe->data);
...@@ -645,7 +645,7 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, ...@@ -645,7 +645,7 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
mlx5e_tx_skb_update_hwts_flags(skb); mlx5e_tx_skb_update_hwts_flags(skb);
if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) { if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) {
/* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */ /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */
cseg = mlx5e_tx_mpwqe_session_complete(sq); cseg = mlx5e_tx_mpwqe_session_complete(sq);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment