Commit 99be5617 authored by Yoray Zack's avatar Yoray Zack Committed by Jakub Kicinski

net/mlx5e: SHAMPO, Re-enable HW-GRO

Add back HW-GRO to the reported features.

As the current implementation of HW-GRO uses KSMs with a
specific fixed buffer size (256B) to map its headers buffer,
we reported the feature only if the NIC is supporting KSM and
the minimum value for buffer size is below the requested one.

iperf3 bandwidth comparison:
+---------+--------+--------+-----------+
| streams | SW GRO | HW GRO | Unit      |
|---------+--------+--------+-----------|
| 1       | 36     | 42     | Gbits/sec |
| 4       | 34     | 39     | Gbits/sec |
| 8       | 31     | 35     | Gbits/sec |
+---------+--------+--------+-----------+

A downstream patch will add skb fragment coalescing which will improve
performance considerably.

Benchmark details:
VM based setup
CPU: Intel(R) Xeon(R) Platinum 8380 CPU, 24 cores
NIC: ConnectX-7 100GbE
iperf3 and irq running on same CPU over a single receive queue
Signed-off-by: default avatarYoray Zack <yorayz@nvidia.com>
Signed-off-by: default avatarTariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/r/20240603212219.1037656-14-tariqt@nvidia.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 758191c9
...@@ -74,6 +74,27 @@ ...@@ -74,6 +74,27 @@
#include "lib/devcom.h" #include "lib/devcom.h"
#include "lib/sd.h" #include "lib/sd.h"
static bool mlx5e_hw_gro_supported(struct mlx5_core_dev *mdev)
{
if (!MLX5_CAP_GEN(mdev, shampo))
return false;
/* Our HW-GRO implementation relies on "KSM Mkey" for
* SHAMPO headers buffer mapping
*/
if (!MLX5_CAP_GEN(mdev, fixed_buffer_size))
return false;
if (!MLX5_CAP_GEN_2(mdev, min_mkey_log_entity_size_fixed_buffer_valid))
return false;
if (MLX5_CAP_GEN_2(mdev, min_mkey_log_entity_size_fixed_buffer) >
MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE)
return false;
return true;
}
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift,
enum mlx5e_mpwrq_umr_mode umr_mode) enum mlx5e_mpwrq_umr_mode umr_mode)
{ {
...@@ -5331,6 +5352,11 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) ...@@ -5331,6 +5352,11 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX;
if (mlx5e_hw_gro_supported(mdev) &&
mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT,
MLX5E_MPWRQ_UMR_MODE_ALIGNED))
netdev->hw_features |= NETIF_F_GRO_HW;
if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_HW_CSUM;
netdev->hw_enc_features |= NETIF_F_TSO; netdev->hw_enc_features |= NETIF_F_TSO;
......
...@@ -1526,8 +1526,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { ...@@ -1526,8 +1526,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 ts_cqe_to_dest_cqn[0x1]; u8 ts_cqe_to_dest_cqn[0x1];
u8 reserved_at_b3[0x6]; u8 reserved_at_b3[0x6];
u8 go_back_n[0x1]; u8 go_back_n[0x1];
u8 shampo[0x1]; u8 reserved_at_ba[0x6];
u8 reserved_at_bb[0x5];
u8 max_sgl_for_optimized_performance[0x8]; u8 max_sgl_for_optimized_performance[0x8];
u8 log_max_cq_sz[0x8]; u8 log_max_cq_sz[0x8];
...@@ -1744,7 +1743,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { ...@@ -1744,7 +1743,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_280[0x10]; u8 reserved_at_280[0x10];
u8 max_wqe_sz_sq[0x10]; u8 max_wqe_sz_sq[0x10];
u8 reserved_at_2a0[0x10]; u8 reserved_at_2a0[0xb];
u8 shampo[0x1];
u8 reserved_at_2ac[0x4];
u8 max_wqe_sz_rq[0x10]; u8 max_wqe_sz_rq[0x10];
u8 max_flow_counter_31_16[0x10]; u8 max_flow_counter_31_16[0x10];
...@@ -2017,7 +2018,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { ...@@ -2017,7 +2018,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
u8 reserved_at_250[0x10]; u8 reserved_at_250[0x10];
u8 reserved_at_260[0x120]; u8 reserved_at_260[0x120];
u8 reserved_at_380[0x10]; u8 reserved_at_380[0xb];
u8 min_mkey_log_entity_size_fixed_buffer[0x5];
u8 ec_vf_vport_base[0x10]; u8 ec_vf_vport_base[0x10];
u8 reserved_at_3a0[0x10]; u8 reserved_at_3a0[0x10];
...@@ -2029,7 +2031,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { ...@@ -2029,7 +2031,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
u8 pcc_ifa2[0x1]; u8 pcc_ifa2[0x1];
u8 reserved_at_3f1[0xf]; u8 reserved_at_3f1[0xf];
u8 reserved_at_400[0x400]; u8 reserved_at_400[0x1];
u8 min_mkey_log_entity_size_fixed_buffer_valid[0x1];
u8 reserved_at_402[0x1e];
u8 reserved_at_420[0x3e0];
}; };
enum mlx5_ifc_flow_destination_type { enum mlx5_ifc_flow_destination_type {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment