Commit f0c227c7 authored by David S. Miller's avatar David S. Miller

Merge tag 'mlx5-updates-2021-06-14' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2021-06-14

1) Trivial Lag refactroing in preparation for upcomming Single FDB lag feature
 - First 3 patches

2) Scalable IRQ distriburion for Sub-functions

A subfunction (SF) is a lightweight function that has a parent PCI
function (PF) on which it is deployed.

Currently, mlx5 subfunction is sharing the IRQs (MSI-X) with their
parent PCI function.

Before this series the PF allocates enough IRQs to cover
all the cores in a system, Newly created SFs will re-use all the IRQs
that the PF has allocated for itself.
Hence, the more SFs are created, there are more EQs per IRQs. Therefore,
whenever we handle an interrupt, we need to pull all SFs EQs and PF EQs
instead of PF EQs without SFs on the system. This leads to a hard impact
on the performance of SFs and PF.

For example, on machine with:
Intel(R) Xeon(R) CPU E5-2697 v3 @ 2.60GHz with 56 cores.
PCI Express 3 with BW of 126 Gb/s.
ConnectX-5 Ex; EDR IB (100Gb/s) and 100GbE; dual-port QSFP28; PCIe4.0 x16.

test case: iperf TX BW single CPU, affinity of app and IRQ are the same.
PF only: no SFs on the system, 56 IRQs.
SF (before), 250 SFs Sharing the same 56 IRQs .
SF (now),    250 SFs + 255 avaiable IRQs for the NIC. (please see IRQ spread scheme below).

	    application SF-IRQ  channel   BW(Gb/sec)         interrupts/sec
            iperf TX            affinity
PF only     cpu={0}     cpu={0} cpu={0}   79                 8200
SF (before) cpu={0}     cpu={0} cpu={0}   51.3 (-35%)        9500
SF (now)    cpu={0}     cpu={0} cpu={0}   78 (-2%)           8200

command:
$ taskset -c 0 iperf -c 11.1.1.1 -P 3 -i 6 -t 30 | grep SUM

The different between the SF examples is that before this series we
allocate num_cpus (56) IRQs, and all of them were shared among the PF
and the SFs. And after this series, we allocate 255 IRQs, and we spread
the SFs among the above IRQs. This have significantly decreased the load
on each IRQ and the number of EQs per IRQ is down by 95% (251->11).

In this patchset the solution proposed is to have a dedicated IRQ pool
for SFs to use. the pool will allocate a large number of IRQs
for SFs to grab from in order to minimize irq sharing between the
different SFs.
IRQs will not be requested from the OS until they are 1st requested by
an SF consumer, and will be eventually released when the last SF consumer
releases them.

For the detailed IRQ spread and allocation scheme  please see last patch:
("net/mlx5: Round-Robin EQs over IRQs")
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 08ab4d74 c36326d3
......@@ -1559,12 +1559,16 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
}
eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
param = (struct mlx5_eq_param){
.irq_index = 0,
param = (struct mlx5_eq_param) {
.nent = MLX5_IB_NUM_PF_EQE,
};
param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;
if (!zalloc_cpumask_var(&param.affinity, GFP_KERNEL)) {
err = -ENOMEM;
goto err_wq;
}
eq->core = mlx5_eq_create_generic(dev->mdev, &param);
free_cpumask_var(param.affinity);
if (IS_ERR(eq->core)) {
err = PTR_ERR(eq->core);
goto err_wq;
......
......@@ -5114,7 +5114,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
mlx5e_set_netdev_mtu_boundaries(priv);
mlx5e_set_dev_port_mtu(priv);
mlx5_lag_add(mdev, netdev);
mlx5_lag_add_netdev(mdev, netdev);
mlx5e_enable_async_events(priv);
mlx5e_enable_blocking_events(priv);
......@@ -5162,7 +5162,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
priv->en_trap = NULL;
}
mlx5e_disable_async_events(priv);
mlx5_lag_remove(mdev);
mlx5_lag_remove_netdev(mdev, priv->netdev);
mlx5_vxlan_reset_to_default(mdev->vxlan);
}
......
......@@ -976,7 +976,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
if (MLX5_CAP_GEN(mdev, uplink_follow))
mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK,
0, 0, MLX5_VPORT_ADMIN_STATE_AUTO);
mlx5_lag_add(mdev, netdev);
mlx5_lag_add_netdev(mdev, netdev);
priv->events_nb.notifier_call = uplink_rep_async_event;
mlx5_notifier_register(mdev, &priv->events_nb);
mlx5e_dcbnl_initialize(priv);
......@@ -1009,7 +1009,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
mlx5e_dcbnl_delete_app(priv);
mlx5_notifier_unregister(mdev, &priv->events_nb);
mlx5e_rep_tc_disable(priv);
mlx5_lag_remove(mdev);
mlx5_lag_remove_netdev(mdev, priv->netdev);
}
static MLX5E_DEFINE_STATS_GRP(sw_rep, 0);
......
......@@ -40,6 +40,7 @@ struct lag_tracker {
struct mlx5_lag {
u8 flags;
u8 v2p_map[MLX5_MAX_PORTS];
struct kref ref;
struct lag_func pf[MLX5_MAX_PORTS];
struct lag_tracker tracker;
struct workqueue_struct *wq;
......@@ -49,7 +50,7 @@ struct mlx5_lag {
};
static inline struct mlx5_lag *
mlx5_lag_dev_get(struct mlx5_core_dev *dev)
mlx5_lag_dev(struct mlx5_core_dev *dev)
{
return dev->priv.lag;
}
......
......@@ -28,7 +28,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
struct mlx5_lag *ldev;
bool res;
ldev = mlx5_lag_dev_get(dev);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_multipath(ldev);
return res;
......
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2018 Mellanox Technologies */
/* Copyright (c) 2018-2021, Mellanox Technologies inc. All rights reserved. */
#ifndef __LIB_MLX5_EQ_H__
#define __LIB_MLX5_EQ_H__
......@@ -32,6 +32,7 @@ struct mlx5_eq {
unsigned int irqn;
u8 eqn;
struct mlx5_rsc_debug *dbg;
struct mlx5_irq *irq;
};
struct mlx5_eq_async {
......
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2021 Mellanox Technologies Ltd */
#ifndef __LIB_MLX5_SF_H__
#define __LIB_MLX5_SF_H__
#include <linux/mlx5/driver.h>
static inline u16 mlx5_sf_start_function_id(const struct mlx5_core_dev *dev)
{
return MLX5_CAP_GEN(dev, sf_base_id);
}
#ifdef CONFIG_MLX5_SF
static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev)
{
return MLX5_CAP_GEN(dev, sf);
}
static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev)
{
if (!mlx5_sf_supported(dev))
return 0;
if (MLX5_CAP_GEN(dev, max_num_sf))
return MLX5_CAP_GEN(dev, max_num_sf);
else
return 1 << MLX5_CAP_GEN(dev, log_max_sf);
}
#else
static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev)
{
return false;
}
static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev)
{
return 0;
}
#endif
#endif
......@@ -76,6 +76,7 @@
#include "sf/vhca_event.h"
#include "sf/dev/dev.h"
#include "sf/sf.h"
#include "mlx5_irq.h"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
......@@ -1185,6 +1186,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
}
mlx5_sf_dev_table_create(dev);
mlx5_lag_add_mdev(dev);
return 0;
......@@ -1219,6 +1221,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
static void mlx5_unload(struct mlx5_core_dev *dev)
{
mlx5_lag_remove_mdev(dev);
mlx5_sf_dev_table_destroy(dev);
mlx5_sriov_detach(dev);
mlx5_ec_cleanup(dev);
......
......@@ -164,27 +164,10 @@ int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcap, u8 feature_group,
int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
u8 feature_group, u8 access_reg_group);
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_remove(struct mlx5_core_dev *dev);
int mlx5_irq_table_init(struct mlx5_core_dev *dev);
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
int mlx5_irq_table_create(struct mlx5_core_dev *dev);
void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
struct notifier_block *nb);
int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
struct notifier_block *nb);
int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
int msix_vec_count);
int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
struct cpumask *
mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx);
struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table);
int mlx5_irq_get_num_comp(struct mlx5_irq_table *table);
struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_add_mdev(struct mlx5_core_dev *dev);
void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev);
int mlx5_events_init(struct mlx5_core_dev *dev);
void mlx5_events_cleanup(struct mlx5_core_dev *dev);
......
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2021 Mellanox Technologies. */
#ifndef __MLX5_IRQ_H__
#define __MLX5_IRQ_H__
#include <linux/mlx5/driver.h>
#define MLX5_COMP_EQS_PER_SF 8
#define MLX5_IRQ_EQ_CTRL (0)
struct mlx5_irq;
int mlx5_irq_table_init(struct mlx5_core_dev *dev);
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
int mlx5_irq_table_create(struct mlx5_core_dev *dev);
void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table);
struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
int msix_vec_count);
int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
struct cpumask *affinity);
void mlx5_irq_release(struct mlx5_irq *irq);
int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq);
int mlx5_irq_get_index(struct mlx5_irq *irq);
#endif /* __MLX5_IRQ_H__ */
......@@ -5,42 +5,7 @@
#define __MLX5_SF_H__
#include <linux/mlx5/driver.h>
static inline u16 mlx5_sf_start_function_id(const struct mlx5_core_dev *dev)
{
return MLX5_CAP_GEN(dev, sf_base_id);
}
#ifdef CONFIG_MLX5_SF
static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev)
{
return MLX5_CAP_GEN(dev, sf);
}
static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev)
{
if (!mlx5_sf_supported(dev))
return 0;
if (MLX5_CAP_GEN(dev, max_num_sf))
return MLX5_CAP_GEN(dev, max_num_sf);
else
return 1 << MLX5_CAP_GEN(dev, log_max_sf);
}
#else
static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev)
{
return false;
}
static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev)
{
return 0;
}
#endif
#include "lib/sf.h"
#ifdef CONFIG_MLX5_SF_MANAGER
......
......@@ -34,6 +34,7 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
#include "mlx5_irq.h"
#include "eswitch.h"
static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf)
......
......@@ -16,6 +16,7 @@ struct mlx5_eq_param {
u8 irq_index;
int nent;
u64 mask[4];
cpumask_var_t affinity;
};
struct mlx5_eq *
......
......@@ -3806,8 +3806,8 @@ struct mlx5_ifc_eqc_bits {
u8 reserved_at_80[0x20];
u8 reserved_at_a0[0x18];
u8 intr[0x8];
u8 reserved_at_a0[0x14];
u8 intr[0xc];
u8 reserved_at_c0[0x3];
u8 log_page_size[0x5];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment