Commit 5ffb4d85 authored by Maxim Mikityanskiy's avatar Maxim Mikityanskiy Committed by Saeed Mahameed

net/mlx5e: Calculate SQ stop room in a robust way

Currently, different formulas are used to estimate the space that may be
taken by WQEs in the SQ during a single packet transmit. This space is
called stop room, and it's checked in the end of packet transmit to find
out if the next packet could overflow the SQ. If it could, the driver
tells the kernel to stop sending next packets.

Many factors affect the stop room:

1. Padding with NOPs to avoid WQEs spanning over page boundaries.

2. Enabled and disabled offloads (TLS, upcoming MPWQE).

3. The maximum size of a WQE.

The padding is performed before every WQE if it doesn't fit the current
page.

The current formula assumes that only one padding will be required per
packet, and it doesn't take into account that the WQEs posted during the
transmission of a single packet might exceed the page size in very rare
circumstances. For example, to hit this condition with 4096-byte pages,
TLS offload will have to interrupt an almost-full MPWQE session, be in
the resync flow and try to transmit a near to maximum amount of data.

To avoid SQ overflows in such rare cases after MPWQE is added, this
patch introduces a more robust formula to estimate the stop room. The
new formula uses the fact that a WQE of size X will not require more
than X-1 WQEBBs of padding. More exact estimations are possible, but
they result in much more complex and error-prone code for little gain.

Before this patch, the TLS stop room included space for both INNOVA and
ConnectX TLS offloads that couldn't run at the same time anyway, so this
patch accounts only for the active one.
Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: default avatarTariq Toukan <tariqt@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent 8b46d424
...@@ -6,25 +6,6 @@ ...@@ -6,25 +6,6 @@
#include "en.h" #include "en.h"
#define MLX5E_SQ_NOPS_ROOM (MLX5_SEND_WQE_MAX_WQEBBS - 1)
#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
MLX5E_SQ_NOPS_ROOM)
#ifndef CONFIG_MLX5_EN_TLS
#define MLX5E_SQ_TLS_ROOM (0)
#else
/* TLS offload requires additional stop_room for:
* - a resync SKB.
* kTLS offload requires fixed additional stop_room for:
* - a static params WQE, and a progress params WQE.
* The additional MTU-depending room for the resync DUMP WQEs
* will be calculated and added in runtime.
*/
#define MLX5E_SQ_TLS_ROOM \
(MLX5_SEND_WQE_MAX_WQEBBS + \
MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS)
#endif
#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
enum mlx5e_icosq_wqe_type { enum mlx5e_icosq_wqe_type {
...@@ -331,4 +312,25 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, ...@@ -331,4 +312,25 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
} }
} }
static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size)
{
BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS);
/* A WQE must not cross the page boundary, hence two conditions:
* 1. Its size must not exceed the page size.
* 2. If the WQE size is X, and the space remaining in a page is less
* than X, this space needs to be padded with NOPs. So, one WQE of
* size X may require up to X-1 WQEBBs of padding, which makes the
* stop room of X-1 + X.
* WQE size is also limited by the hardware limit.
*/
if (__builtin_constant_p(wqe_size))
BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
else
WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
return wqe_size * 2 - 1;
}
#endif #endif
...@@ -257,8 +257,10 @@ enum { ...@@ -257,8 +257,10 @@ enum {
static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
{ {
if (unlikely(!sq->mpwqe.wqe)) { if (unlikely(!sq->mpwqe.wqe)) {
const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
MLX5E_XDPSQ_STOP_ROOM))) { stop_room))) {
/* SQ is full, ring doorbell */ /* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq); mlx5e_xmit_xdp_doorbell(sq);
sq->stats->full++; sq->stats->full++;
......
...@@ -40,8 +40,6 @@ ...@@ -40,8 +40,6 @@
(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
#define MLX5E_XDPSQ_STOP_ROOM (MLX5E_SQ_STOP_ROOM)
#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg)) #define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg))
#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \ #define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \
DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS) DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS)
......
...@@ -4,6 +4,19 @@ ...@@ -4,6 +4,19 @@
#include "en.h" #include "en.h"
#include "en_accel/ktls.h" #include "en_accel/ktls.h"
u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq)
{
u16 num_dumps, stop_room = 0;
num_dumps = mlx5e_ktls_dumps_num_wqes(sq, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE);
stop_room += mlx5e_stop_room_for_wqe(MLX5E_KTLS_STATIC_WQEBBS);
stop_room += mlx5e_stop_room_for_wqe(MLX5E_KTLS_PROGRESS_WQEBBS);
stop_room += num_dumps * mlx5e_stop_room_for_wqe(MLX5E_KTLS_DUMP_WQEBBS);
return stop_room;
}
static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn) static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn)
{ {
u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
......
...@@ -102,15 +102,16 @@ bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *s ...@@ -102,15 +102,16 @@ bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *s
void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi, struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc); u32 *dma_fifo_cc);
u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq);
static inline u8 static inline u8
mlx5e_ktls_dumps_num_wqebbs(struct mlx5e_txqsq *sq, unsigned int nfrags, mlx5e_ktls_dumps_num_wqes(struct mlx5e_txqsq *sq, unsigned int nfrags,
unsigned int sync_len) unsigned int sync_len)
{ {
/* Given the MTU and sync_len, calculates an upper bound for the /* Given the MTU and sync_len, calculates an upper bound for the
* number of WQEBBs needed for the TX resync DUMP WQEs of a record. * number of DUMP WQEs needed for the TX resync of a record.
*/ */
return MLX5E_KTLS_DUMP_WQEBBS * return nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu);
(nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu));
} }
#else #else
...@@ -122,7 +123,6 @@ static inline void ...@@ -122,7 +123,6 @@ static inline void
mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi, struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc) {} u32 *dma_fifo_cc) {}
#endif #endif
#endif /* __MLX5E_TLS_H__ */ #endif /* __MLX5E_TLS_H__ */
...@@ -240,3 +240,17 @@ void mlx5e_tls_cleanup(struct mlx5e_priv *priv) ...@@ -240,3 +240,17 @@ void mlx5e_tls_cleanup(struct mlx5e_priv *priv)
kfree(tls); kfree(tls);
priv->tls = NULL; priv->tls = NULL;
} }
u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq)
{
struct mlx5_core_dev *mdev = sq->channel->mdev;
if (!mlx5_accel_is_tls_device(mdev))
return 0;
if (MLX5_CAP_GEN(mdev, tls_tx))
return mlx5e_ktls_get_stop_room(sq);
/* Resync SKB. */
return mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
}
...@@ -94,6 +94,8 @@ int mlx5e_tls_get_count(struct mlx5e_priv *priv); ...@@ -94,6 +94,8 @@ int mlx5e_tls_get_count(struct mlx5e_priv *priv);
int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data); int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data); int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq);
#else #else
static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
...@@ -108,6 +110,11 @@ static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; } ...@@ -108,6 +110,11 @@ static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }
static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; } static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; }
static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; } static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; }
static inline u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq)
{
return 0;
}
#endif #endif
#endif /* __MLX5E_TLS_H__ */ #endif /* __MLX5E_TLS_H__ */
...@@ -1122,6 +1122,22 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) ...@@ -1122,6 +1122,22 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
return 0; return 0;
} }
static int mlx5e_calc_sq_stop_room(struct mlx5e_txqsq *sq, u8 log_sq_size)
{
int sq_size = 1 << log_sq_size;
sq->stop_room = mlx5e_tls_get_stop_room(sq);
sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
if (WARN_ON(sq->stop_room >= sq_size)) {
netdev_err(sq->channel->netdev, "Stop room %hu is bigger than the SQ size %d\n",
sq->stop_room, sq_size);
return -ENOSPC;
}
return 0;
}
static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
int txq_ix, int txq_ix,
...@@ -1146,20 +1162,16 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, ...@@ -1146,20 +1162,16 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->min_inline_mode = params->tx_min_inline_mode; sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
sq->stop_room = MLX5E_SQ_STOP_ROOM;
INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
if (MLX5_IPSEC_DEV(c->priv->mdev)) if (MLX5_IPSEC_DEV(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
#ifdef CONFIG_MLX5_EN_TLS if (mlx5_accel_is_tls_device(c->priv->mdev))
if (mlx5_accel_is_tls_device(c->priv->mdev)) {
set_bit(MLX5E_SQ_STATE_TLS, &sq->state); set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
sq->stop_room += MLX5E_SQ_TLS_ROOM + err = mlx5e_calc_sq_stop_room(sq, params->log_sq_size);
mlx5e_ktls_dumps_num_wqebbs(sq, MAX_SKB_FRAGS, if (err)
TLS_MAX_PAYLOAD_SIZE); return err;
}
#endif
param->wq.db_numa_node = cpu_to_node(c->cpu); param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl); err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment