Commit 214baf22 authored by Maxim Mikityanskiy's avatar Maxim Mikityanskiy Committed by Jakub Kicinski

net/mlx5e: Support HTB offload

This commit adds support for HTB offload in the mlx5e driver.

Performance:

  NIC: Mellanox ConnectX-6 Dx
  CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz (24 cores with HT)

  100 Gbit/s line rate, 500 UDP streams @ ~200 Mbit/s each
  48 traffic classes, flower used for steering
  No shaping (rate limits set to 4 Gbit/s per TC) - checking for max
  throughput.

  Baseline: 98.7 Gbps, 8.25 Mpps
  HTB: 6.7 Gbps, 0.56 Mpps
  HTB offload: 95.6 Gbps, 8.00 Mpps

Limitations:

1. 256 leaf nodes, 3 levels of depth.

2. Granularity for ceil is 1 Mbit/s. Rates are converted to weights, and
the bandwidth is split among the siblings according to these weights.
Other parameters for classes are not supported.

Ethtool statistics support for QoS SQs are also added. The counters are
called qos_txN_*, where N is the QoS queue number (starting from 0, the
numeration is separate from the normal SQs), and * is the counter name
(the counters are the same as for the normal SQs).
Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: default avatarTariq Toukan <tariqt@nvidia.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 83271586
...@@ -16,7 +16,8 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ ...@@ -16,7 +16,8 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o fw_reset.o diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
fw_reset.o qos.o
# #
# Netdev basic # Netdev basic
...@@ -25,7 +26,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ ...@@ -25,7 +26,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \
en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
en/qos.o
# #
# Netdev extra # Netdev extra
......
...@@ -55,6 +55,7 @@ ...@@ -55,6 +55,7 @@
#include "en_stats.h" #include "en_stats.h"
#include "en/dcbnl.h" #include "en/dcbnl.h"
#include "en/fs.h" #include "en/fs.h"
#include "en/qos.h"
#include "lib/hv_vhca.h" #include "lib/hv_vhca.h"
extern const struct net_device_ops mlx5e_netdev_ops; extern const struct net_device_ops mlx5e_netdev_ops;
...@@ -161,6 +162,9 @@ do { \ ...@@ -161,6 +162,9 @@ do { \
##__VA_ARGS__); \ ##__VA_ARGS__); \
} while (0) } while (0)
#define mlx5e_state_dereference(priv, p) \
rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock))
enum mlx5e_rq_group { enum mlx5e_rq_group {
MLX5E_RQ_GROUP_REGULAR, MLX5E_RQ_GROUP_REGULAR,
MLX5E_RQ_GROUP_XSK, MLX5E_RQ_GROUP_XSK,
...@@ -663,11 +667,13 @@ struct mlx5e_channel { ...@@ -663,11 +667,13 @@ struct mlx5e_channel {
struct mlx5e_xdpsq rq_xdpsq; struct mlx5e_xdpsq rq_xdpsq;
struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC]; struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC];
struct mlx5e_icosq icosq; /* internal control operations */ struct mlx5e_icosq icosq; /* internal control operations */
struct mlx5e_txqsq __rcu * __rcu *qos_sqs;
bool xdp; bool xdp;
struct napi_struct napi; struct napi_struct napi;
struct device *pdev; struct device *pdev;
struct net_device *netdev; struct net_device *netdev;
__be32 mkey_be; __be32 mkey_be;
u16 qos_sqs_size;
u8 num_tc; u8 num_tc;
u8 lag_port; u8 lag_port;
...@@ -756,6 +762,8 @@ struct mlx5e_modify_sq_param { ...@@ -756,6 +762,8 @@ struct mlx5e_modify_sq_param {
int next_state; int next_state;
int rl_update; int rl_update;
int rl_index; int rl_index;
bool qos_update;
u16 qos_queue_group_id;
}; };
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
...@@ -788,10 +796,20 @@ struct mlx5e_scratchpad { ...@@ -788,10 +796,20 @@ struct mlx5e_scratchpad {
cpumask_var_t cpumask; cpumask_var_t cpumask;
}; };
struct mlx5e_htb {
DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES));
DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES);
struct mlx5e_sq_stats **qos_sq_stats;
u16 max_qos_sqs;
u16 maj_id;
u16 defcls;
};
struct mlx5e_priv { struct mlx5e_priv {
/* priv data path fields - start */ /* priv data path fields - start */
/* +1 for port ptp ts */ /* +1 for port ptp ts */
struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC]; struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC +
MLX5E_QOS_MAX_LEAF_NODES];
int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC]; int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC];
#ifdef CONFIG_MLX5_CORE_EN_DCB #ifdef CONFIG_MLX5_CORE_EN_DCB
...@@ -859,6 +877,7 @@ struct mlx5e_priv { ...@@ -859,6 +877,7 @@ struct mlx5e_priv {
struct mlx5e_hv_vhca_stats_agent stats_agent; struct mlx5e_hv_vhca_stats_agent stats_agent;
#endif #endif
struct mlx5e_scratchpad scratchpad; struct mlx5e_scratchpad scratchpad;
struct mlx5e_htb htb;
}; };
struct mlx5e_rx_handlers { struct mlx5e_rx_handlers {
...@@ -986,6 +1005,7 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, ...@@ -986,6 +1005,7 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
struct mlx5e_channels *new_chs, struct mlx5e_channels *new_chs,
mlx5e_fp_preactivate preactivate, mlx5e_fp_preactivate preactivate,
void *context); void *context);
int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv);
int mlx5e_num_channels_changed(struct mlx5e_priv *priv); int mlx5e_num_channels_changed(struct mlx5e_priv *priv);
int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context);
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
...@@ -1010,6 +1030,9 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq); ...@@ -1010,6 +1030,9 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq);
int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
struct mlx5e_modify_sq_param *p); struct mlx5e_modify_sq_param *p);
int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
struct mlx5e_params *params, struct mlx5e_sq_param *param,
struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid);
void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq);
void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq); void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq);
void mlx5e_free_txqsq(struct mlx5e_txqsq *sq); void mlx5e_free_txqsq(struct mlx5e_txqsq *sq);
...@@ -1020,8 +1043,10 @@ struct mlx5e_create_sq_param; ...@@ -1020,8 +1043,10 @@ struct mlx5e_create_sq_param;
int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
struct mlx5e_sq_param *param, struct mlx5e_sq_param *param,
struct mlx5e_create_sq_param *csp, struct mlx5e_create_sq_param *csp,
u16 qos_queue_group_id,
u32 *sqn); u32 *sqn);
void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
void mlx5e_close_txqsq(struct mlx5e_txqsq *sq);
static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
{ {
......
...@@ -118,6 +118,8 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv, ...@@ -118,6 +118,8 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv,
struct mlx5e_rq_param *param); struct mlx5e_rq_param *param);
void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
struct mlx5e_sq_param *param); struct mlx5e_sq_param *param);
void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params,
struct mlx5e_sq_param *param);
void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
struct mlx5e_params *params, struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk, struct mlx5e_xsk_param *xsk,
......
...@@ -261,7 +261,7 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_port_ptp *c, u32 tisn, ...@@ -261,7 +261,7 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_port_ptp *c, u32 tisn,
csp.min_inline_mode = txqsq->min_inline_mode; csp.min_inline_mode = txqsq->min_inline_mode;
csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn; csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn;
err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, &txqsq->sqn); err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn);
if (err) if (err)
goto err_free_txqsq; goto err_free_txqsq;
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
#ifndef __MLX5E_EN_QOS_H
#define __MLX5E_EN_QOS_H
#include <linux/mlx5/driver.h>
#define MLX5E_QOS_MAX_LEAF_NODES 256
struct mlx5e_priv;
struct mlx5e_channels;
struct mlx5e_channel;
int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv);
/* TX datapath API */
int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid);
struct mlx5e_txqsq *mlx5e_get_sq(struct mlx5e_priv *priv, int qid);
/* SQ lifecycle */
int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
void mlx5e_qos_activate_queues(struct mlx5e_priv *priv);
void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c);
void mlx5e_qos_close_queues(struct mlx5e_channel *c);
/* HTB API */
int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
struct netlink_ext_ack *extack);
int mlx5e_htb_root_del(struct mlx5e_priv *priv);
int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid,
u32 parent_classid, u64 rate, u64 ceil,
struct netlink_ext_ack *extack);
int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid,
u64 rate, u64 ceil, struct netlink_ext_ack *extack);
int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
u16 *new_qid, struct netlink_ext_ack *extack);
int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force,
struct netlink_ext_ack *extack);
int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil,
struct netlink_ext_ack *extack);
#endif
...@@ -447,6 +447,17 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, ...@@ -447,6 +447,17 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
goto out; goto out;
} }
/* Don't allow changing the number of channels if HTB offload is active,
* because the numeration of the QoS SQs will change, while per-queue
* qdiscs are attached.
*/
if (priv->htb.maj_id) {
err = -EINVAL;
netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the number of channels\n",
__func__);
goto out;
}
new_channels.params = priv->channels.params; new_channels.params = priv->channels.params;
new_channels.params.num_channels = count; new_channels.params.num_channels = count;
...@@ -1966,6 +1977,16 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) ...@@ -1966,6 +1977,16 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable)
if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
return -EOPNOTSUPP; return -EOPNOTSUPP;
/* Don't allow changing the PTP state if HTB offload is active, because
* the numeration of the QoS SQs will change, while per-queue qdiscs are
* attached.
*/
if (priv->htb.maj_id) {
netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the PTP state\n",
__func__);
return -EINVAL;
}
new_channels.params = priv->channels.params; new_channels.params = priv->channels.params;
MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_TX_PORT_TS, enable); MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_TX_PORT_TS, enable);
/* No need to verify SQ stop room as /* No need to verify SQ stop room as
......
...@@ -420,6 +420,25 @@ static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv, ...@@ -420,6 +420,25 @@ static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv,
} }
} }
static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv,
struct mlx5e_sw_stats *s)
{
struct mlx5e_sq_stats **stats;
u16 max_qos_sqs;
int i;
/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
stats = READ_ONCE(priv->htb.qos_sq_stats);
for (i = 0; i < max_qos_sqs; i++) {
mlx5e_stats_grp_sw_update_stats_sq(s, READ_ONCE(stats[i]));
/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
barrier();
}
}
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
{ {
struct mlx5e_sw_stats *s = &priv->stats.sw; struct mlx5e_sw_stats *s = &priv->stats.sw;
...@@ -449,6 +468,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) ...@@ -449,6 +468,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
} }
} }
mlx5e_stats_grp_sw_update_stats_ptp(priv, s); mlx5e_stats_grp_sw_update_stats_ptp(priv, s);
mlx5e_stats_grp_sw_update_stats_qos(priv, s);
} }
static const struct counter_desc q_stats_desc[] = { static const struct counter_desc q_stats_desc[] = {
...@@ -1740,6 +1760,41 @@ static const struct counter_desc ptp_cq_stats_desc[] = { ...@@ -1740,6 +1760,41 @@ static const struct counter_desc ptp_cq_stats_desc[] = {
{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) }, { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) },
}; };
static const struct counter_desc qos_sq_stats_desc[] = {
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, nop) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
#ifdef CONFIG_MLX5_EN_TLS
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ctx) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
#endif
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_none) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, stopped) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, dropped) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, recover) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqes) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, wake) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
};
#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc)
#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc)
#define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc) #define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc)
...@@ -1750,6 +1805,49 @@ static const struct counter_desc ptp_cq_stats_desc[] = { ...@@ -1750,6 +1805,49 @@ static const struct counter_desc ptp_cq_stats_desc[] = {
#define NUM_PTP_SQ_STATS ARRAY_SIZE(ptp_sq_stats_desc) #define NUM_PTP_SQ_STATS ARRAY_SIZE(ptp_sq_stats_desc)
#define NUM_PTP_CH_STATS ARRAY_SIZE(ptp_ch_stats_desc) #define NUM_PTP_CH_STATS ARRAY_SIZE(ptp_ch_stats_desc)
#define NUM_PTP_CQ_STATS ARRAY_SIZE(ptp_cq_stats_desc) #define NUM_PTP_CQ_STATS ARRAY_SIZE(ptp_cq_stats_desc)
#define NUM_QOS_SQ_STATS ARRAY_SIZE(qos_sq_stats_desc)
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos)
{
/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb.max_qos_sqs);
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qos)
{
/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
u16 max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
int i, qid;
for (qid = 0; qid < max_qos_sqs; qid++)
for (i = 0; i < NUM_QOS_SQ_STATS; i++)
sprintf(data + (idx++) * ETH_GSTRING_LEN,
qos_sq_stats_desc[i].format, qid);
return idx;
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos)
{
struct mlx5e_sq_stats **stats;
u16 max_qos_sqs;
int i, qid;
/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
stats = READ_ONCE(priv->htb.qos_sq_stats);
for (qid = 0; qid < max_qos_sqs; qid++) {
struct mlx5e_sq_stats *s = READ_ONCE(stats[qid]);
for (i = 0; i < NUM_QOS_SQ_STATS; i++)
data[idx++] = MLX5E_READ_CTR64_CPU(s, qos_sq_stats_desc, i);
}
return idx;
}
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; }
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp)
{ {
...@@ -1932,6 +2030,7 @@ MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0); ...@@ -1932,6 +2030,7 @@ MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0);
MLX5E_DEFINE_STATS_GRP(eth_ext, 0); MLX5E_DEFINE_STATS_GRP(eth_ext, 0);
static MLX5E_DEFINE_STATS_GRP(tls, 0); static MLX5E_DEFINE_STATS_GRP(tls, 0);
static MLX5E_DEFINE_STATS_GRP(ptp, 0); static MLX5E_DEFINE_STATS_GRP(ptp, 0);
static MLX5E_DEFINE_STATS_GRP(qos, 0);
/* The stats groups order is opposite to the update_stats() order calls */ /* The stats groups order is opposite to the update_stats() order calls */
mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
...@@ -1955,6 +2054,7 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { ...@@ -1955,6 +2054,7 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
&MLX5E_STATS_GRP(channels), &MLX5E_STATS_GRP(channels),
&MLX5E_STATS_GRP(per_port_buff_congest), &MLX5E_STATS_GRP(per_port_buff_congest),
&MLX5E_STATS_GRP(ptp), &MLX5E_STATS_GRP(ptp),
&MLX5E_STATS_GRP(qos),
}; };
unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv) unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv)
......
...@@ -55,6 +55,8 @@ ...@@ -55,6 +55,8 @@
#define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld)
#define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld)
#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld)
struct counter_desc { struct counter_desc {
char format[ETH_GSTRING_LEN]; char format[ETH_GSTRING_LEN];
size_t offset; /* Byte offset */ size_t offset; /* Byte offset */
......
...@@ -106,28 +106,53 @@ static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb) ...@@ -106,28 +106,53 @@ static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb)
return priv->port_ptp_tc2realtxq[up]; return priv->port_ptp_tc2realtxq[up];
} }
static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb,
u16 htb_maj_id)
{
u16 classid;
if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id)
classid = TC_H_MIN(skb->priority);
else
classid = READ_ONCE(priv->htb.defcls);
if (!classid)
return 0;
return mlx5e_get_txq_by_classid(priv, classid);
}
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev) struct net_device *sb_dev)
{ {
struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_priv *priv = netdev_priv(dev);
int num_tc_x_num_ch;
int txq_ix; int txq_ix;
int up = 0; int up = 0;
int ch_ix; int ch_ix;
if (unlikely(priv->channels.port_ptp)) { /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */
int num_tc_x_num_ch; num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch);
if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) {
/* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id);
if (unlikely(htb_maj_id)) {
txq_ix = mlx5e_select_htb_queue(priv, skb, htb_maj_id);
if (txq_ix > 0)
return txq_ix;
}
if (unlikely(priv->channels.port_ptp))
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
mlx5e_use_ptpsq(skb)) mlx5e_use_ptpsq(skb))
return mlx5e_select_ptpsq(dev, skb); return mlx5e_select_ptpsq(dev, skb);
/* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */
num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch);
txq_ix = netdev_pick_tx(dev, skb, NULL); txq_ix = netdev_pick_tx(dev, skb, NULL);
/* Fix netdev_pick_tx() not to choose ptp_channel txqs. /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
* If they are selected, switch to regular queues. * If they are selected, switch to regular queues.
* Driver to select these queues only at mlx5e_select_ptpsq(). * Driver to select these queues only at mlx5e_select_ptpsq()
* and mlx5e_select_htb_queue().
*/ */
if (unlikely(txq_ix >= num_tc_x_num_ch)) if (unlikely(txq_ix >= num_tc_x_num_ch))
txq_ix %= num_tc_x_num_ch; txq_ix %= num_tc_x_num_ch;
...@@ -702,6 +727,10 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -702,6 +727,10 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
u16 pi; u16 pi;
sq = priv->txq2sq[skb_get_queue_mapping(skb)]; sq = priv->txq2sq[skb_get_queue_mapping(skb)];
if (unlikely(!sq)) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
/* May send SKBs and WQEs. */ /* May send SKBs and WQEs. */
if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel))) if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel)))
......
...@@ -115,17 +115,21 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) ...@@ -115,17 +115,21 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
napi); napi);
struct mlx5e_ch_stats *ch_stats = c->stats; struct mlx5e_ch_stats *ch_stats = c->stats;
struct mlx5e_xdpsq *xsksq = &c->xsksq; struct mlx5e_xdpsq *xsksq = &c->xsksq;
struct mlx5e_txqsq __rcu **qos_sqs;
struct mlx5e_rq *xskrq = &c->xskrq; struct mlx5e_rq *xskrq = &c->xskrq;
struct mlx5e_rq *rq = &c->rq; struct mlx5e_rq *rq = &c->rq;
bool aff_change = false; bool aff_change = false;
bool busy_xsk = false; bool busy_xsk = false;
bool busy = false; bool busy = false;
int work_done = 0; int work_done = 0;
u16 qos_sqs_size;
bool xsk_open; bool xsk_open;
int i; int i;
rcu_read_lock(); rcu_read_lock();
qos_sqs = rcu_dereference(c->qos_sqs);
xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
ch_stats->poll++; ch_stats->poll++;
...@@ -133,6 +137,18 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) ...@@ -133,6 +137,18 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
for (i = 0; i < c->num_tc; i++) for (i = 0; i < c->num_tc; i++)
busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
if (unlikely(qos_sqs)) {
smp_rmb(); /* Pairs with mlx5e_qos_alloc_queues. */
qos_sqs_size = READ_ONCE(c->qos_sqs_size);
for (i = 0; i < qos_sqs_size; i++) {
struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]);
if (sq)
busy |= mlx5e_poll_tx_cq(&sq->cq, budget);
}
}
busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
if (c->xdp) if (c->xdp)
...@@ -186,6 +202,16 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) ...@@ -186,6 +202,16 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
mlx5e_handle_tx_dim(&c->sq[i]); mlx5e_handle_tx_dim(&c->sq[i]);
mlx5e_cq_arm(&c->sq[i].cq); mlx5e_cq_arm(&c->sq[i].cq);
} }
if (unlikely(qos_sqs)) {
for (i = 0; i < qos_sqs_size; i++) {
struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]);
if (sq) {
mlx5e_handle_tx_dim(sq);
mlx5e_cq_arm(&sq->cq);
}
}
}
mlx5e_handle_rx_dim(rq); mlx5e_handle_rx_dim(rq);
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
#include "qos.h"
#define MLX5_QOS_DEFAULT_DWRR_UID 0
bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev)
{
if (!MLX5_CAP_GEN(mdev, qos))
return false;
if (!MLX5_CAP_QOS(mdev, nic_sq_scheduling))
return false;
if (!MLX5_CAP_QOS(mdev, nic_bw_share))
return false;
if (!MLX5_CAP_QOS(mdev, nic_rate_limit))
return false;
return true;
}
int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev)
{
return 1 << MLX5_CAP_QOS(mdev, log_max_qos_nic_queue_group);
}
int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
u32 bw_share, u32 max_avg_bw, u32 *id)
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP);
MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
sched_ctx, id);
}
int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
u32 bw_share, u32 max_avg_bw, u32 *id)
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
void *attr;
MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
sched_ctx, id);
}
int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id)
{
return mlx5_qos_create_inner_node(mdev, MLX5_QOS_DEFAULT_DWRR_UID, 0, 0, id);
}
int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id,
u32 bw_share, u32 max_avg_bw, u32 id)
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
u32 bitmask = 0;
MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
return mlx5_modify_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
sched_ctx, id, bitmask);
}
int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id)
{
return mlx5_destroy_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, id);
}
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
#ifndef __MLX5_QOS_H
#define __MLX5_QOS_H
#include "mlx5_core.h"
#define MLX5_DEBUG_QOS_MASK BIT(4)
#define qos_err(mdev, fmt, ...) \
mlx5_core_err(mdev, "QoS: " fmt, ##__VA_ARGS__)
#define qos_warn(mdev, fmt, ...) \
mlx5_core_warn(mdev, "QoS: " fmt, ##__VA_ARGS__)
#define qos_dbg(mdev, fmt, ...) \
mlx5_core_dbg_mask(mdev, MLX5_DEBUG_QOS_MASK, "QoS: " fmt, ##__VA_ARGS__)
bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev);
int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
u32 bw_share, u32 max_avg_bw, u32 *id);
int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
u32 bw_share, u32 max_avg_bw, u32 *id);
int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id);
int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id, u32 bw_share,
u32 max_avg_bw, u32 id);
int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id);
#endif
...@@ -842,11 +842,16 @@ struct mlx5_ifc_qos_cap_bits { ...@@ -842,11 +842,16 @@ struct mlx5_ifc_qos_cap_bits {
u8 reserved_at_4[0x1]; u8 reserved_at_4[0x1];
u8 packet_pacing_burst_bound[0x1]; u8 packet_pacing_burst_bound[0x1];
u8 packet_pacing_typical_size[0x1]; u8 packet_pacing_typical_size[0x1];
u8 reserved_at_7[0x4]; u8 reserved_at_7[0x1];
u8 nic_sq_scheduling[0x1];
u8 nic_bw_share[0x1];
u8 nic_rate_limit[0x1];
u8 packet_pacing_uid[0x1]; u8 packet_pacing_uid[0x1];
u8 reserved_at_c[0x14]; u8 reserved_at_c[0x14];
u8 reserved_at_20[0x20]; u8 reserved_at_20[0xb];
u8 log_max_qos_nic_queue_group[0x5];
u8 reserved_at_30[0x10];
u8 packet_pacing_max_rate[0x20]; u8 packet_pacing_max_rate[0x20];
...@@ -3347,7 +3352,7 @@ struct mlx5_ifc_sqc_bits { ...@@ -3347,7 +3352,7 @@ struct mlx5_ifc_sqc_bits {
u8 reserved_at_e0[0x10]; u8 reserved_at_e0[0x10];
u8 packet_pacing_rate_limit_index[0x10]; u8 packet_pacing_rate_limit_index[0x10];
u8 tis_lst_sz[0x10]; u8 tis_lst_sz[0x10];
u8 reserved_at_110[0x10]; u8 qos_queue_group_id[0x10];
u8 reserved_at_120[0x40]; u8 reserved_at_120[0x40];
...@@ -3362,6 +3367,7 @@ enum { ...@@ -3362,6 +3367,7 @@ enum {
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1,
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2,
SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3,
SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP = 0x4,
}; };
enum { enum {
...@@ -4805,6 +4811,7 @@ struct mlx5_ifc_query_scheduling_element_out_bits { ...@@ -4805,6 +4811,7 @@ struct mlx5_ifc_query_scheduling_element_out_bits {
enum { enum {
SCHEDULING_HIERARCHY_E_SWITCH = 0x2, SCHEDULING_HIERARCHY_E_SWITCH = 0x2,
SCHEDULING_HIERARCHY_NIC = 0x3,
}; };
struct mlx5_ifc_query_scheduling_element_in_bits { struct mlx5_ifc_query_scheduling_element_in_bits {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment