Commit 7c34ec19 authored by Aviv Heller's avatar Aviv Heller Committed by Saeed Mahameed

net/mlx5: Make RoCE and SR-IOV LAG modes explicit

With the introduction of SR-IOV LAG, checking whether LAG is active
is no longer good enough, since RoCE and SR-IOV LAG each entails
different behavior by both the core and infiniband drivers.

This patch introduces facilities to discern LAG type, in addition to
mlx5_lag_is_active(). These are implemented in such a way as to allow
more complex mode combinations in the future.
Signed-off-by: default avatarAviv Heller <avivh@mellanox.com>
Reviewed-by: default avatarRoi Dayan <roid@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent 292612d6
...@@ -445,7 +445,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, ...@@ -445,7 +445,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
if (!ndev) if (!ndev)
goto out; goto out;
if (mlx5_lag_is_active(dev->mdev)) { if (dev->lag_active) {
rcu_read_lock(); rcu_read_lock();
upper = netdev_master_upper_dev_get_rcu(ndev); upper = netdev_master_upper_dev_get_rcu(ndev);
if (upper) { if (upper) {
...@@ -1848,7 +1848,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, ...@@ -1848,7 +1848,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->lib_caps = req.lib_caps; context->lib_caps = req.lib_caps;
print_lib_caps(dev, context->lib_caps); print_lib_caps(dev, context->lib_caps);
if (mlx5_lag_is_active(dev->mdev)) { if (dev->lag_active) {
u8 port = mlx5_core_native_port_num(dev->mdev); u8 port = mlx5_core_native_port_num(dev->mdev);
atomic_set(&context->tx_port_affinity, atomic_set(&context->tx_port_affinity,
...@@ -4841,7 +4841,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) ...@@ -4841,7 +4841,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
struct mlx5_flow_table *ft; struct mlx5_flow_table *ft;
int err; int err;
if (!ns || !mlx5_lag_is_active(mdev)) if (!ns || !mlx5_lag_is_roce(mdev))
return 0; return 0;
err = mlx5_cmd_create_vport_lag(mdev); err = mlx5_cmd_create_vport_lag(mdev);
...@@ -4855,6 +4855,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) ...@@ -4855,6 +4855,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
} }
dev->flow_db->lag_demux_ft = ft; dev->flow_db->lag_demux_ft = ft;
dev->lag_active = true;
return 0; return 0;
err_destroy_vport_lag: err_destroy_vport_lag:
...@@ -4866,7 +4867,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) ...@@ -4866,7 +4867,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
{ {
struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_core_dev *mdev = dev->mdev;
if (dev->flow_db->lag_demux_ft) { if (dev->lag_active) {
dev->lag_active = false;
mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft); mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
dev->flow_db->lag_demux_ft = NULL; dev->flow_db->lag_demux_ft = NULL;
...@@ -6173,7 +6176,7 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) ...@@ -6173,7 +6176,7 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
const char *name; const char *name;
rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group); rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group);
if (!mlx5_lag_is_active(dev->mdev)) if (!mlx5_lag_is_roce(dev->mdev))
name = "mlx5_%d"; name = "mlx5_%d";
else else
name = "mlx5_bond_%d"; name = "mlx5_bond_%d";
......
...@@ -936,6 +936,7 @@ struct mlx5_ib_dev { ...@@ -936,6 +936,7 @@ struct mlx5_ib_dev {
struct mlx5_ib_delay_drop delay_drop; struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile; const struct mlx5_ib_profile *profile;
struct mlx5_eswitch_rep *rep; struct mlx5_eswitch_rep *rep;
int lag_active;
struct mlx5_ib_lb_state lb; struct mlx5_ib_lb_state lb;
u8 umr_fence; u8 umr_fence;
......
...@@ -3258,7 +3258,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, ...@@ -3258,7 +3258,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
(ibqp->qp_type == IB_QPT_RAW_PACKET) || (ibqp->qp_type == IB_QPT_RAW_PACKET) ||
(ibqp->qp_type == IB_QPT_XRC_INI) || (ibqp->qp_type == IB_QPT_XRC_INI) ||
(ibqp->qp_type == IB_QPT_XRC_TGT)) { (ibqp->qp_type == IB_QPT_XRC_TGT)) {
if (mlx5_lag_is_active(dev->mdev)) { if (dev->lag_active) {
u8 p = mlx5_core_native_port_num(dev->mdev); u8 p = mlx5_core_native_port_num(dev->mdev);
tx_affinity = get_tx_affinity(dev, pd, base, p); tx_affinity = get_tx_affinity(dev, pd, base, p);
context->flags |= cpu_to_be32(tx_affinity << 24); context->flags |= cpu_to_be32(tx_affinity << 24);
......
...@@ -35,7 +35,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, ...@@ -35,7 +35,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
dst_is_lag_dev = (uplink_upper && dst_is_lag_dev = (uplink_upper &&
netif_is_lag_master(uplink_upper) && netif_is_lag_master(uplink_upper) &&
rt->dst.dev == uplink_upper && rt->dst.dev == uplink_upper &&
mlx5_lag_is_active(priv->mdev)); mlx5_lag_is_sriov(priv->mdev));
/* if the egress device isn't on the same HW e-switch or /* if the egress device isn't on the same HW e-switch or
* it's a LAG device, use the uplink * it's a LAG device, use the uplink
...@@ -94,7 +94,7 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, ...@@ -94,7 +94,7 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
dst_is_lag_dev = (uplink_upper && dst_is_lag_dev = (uplink_upper &&
netif_is_lag_master(uplink_upper) && netif_is_lag_master(uplink_upper) &&
dst->dev == uplink_upper && dst->dev == uplink_upper &&
mlx5_lag_is_active(priv->mdev)); mlx5_lag_is_sriov(priv->mdev));
/* if the egress device isn't on the same HW e-switch or /* if the egress device isn't on the same HW e-switch or
* it's a LAG device, use the uplink * it's a LAG device, use the uplink
......
...@@ -314,7 +314,7 @@ int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) ...@@ -314,7 +314,7 @@ int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
switch (attr->id) { switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
attr->u.ppid.id_len = ETH_ALEN; attr->u.ppid.id_len = ETH_ALEN;
if (uplink_upper && mlx5_lag_is_active(uplink_priv->mdev)) { if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) {
ether_addr_copy(attr->u.ppid.id, uplink_upper->dev_addr); ether_addr_copy(attr->u.ppid.id, uplink_upper->dev_addr);
} else { } else {
struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5e_rep_priv *rpriv = priv->ppriv;
......
...@@ -2718,7 +2718,7 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) ...@@ -2718,7 +2718,7 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
MLX5_DEVCOM_ESW_OFFLOADS); MLX5_DEVCOM_ESW_OFFLOADS);
return esw_paired && mlx5_lag_is_active(attr->in_mdev) && return esw_paired && mlx5_lag_is_sriov(attr->in_mdev) &&
(is_rep_ingress || act_is_encap); (is_rep_ingress || act_is_encap);
} }
......
...@@ -37,9 +37,12 @@ ...@@ -37,9 +37,12 @@
#include "eswitch.h" #include "eswitch.h"
enum { enum {
MLX5_LAG_FLAG_BONDED = 1 << 0, MLX5_LAG_FLAG_ROCE = 1 << 0,
MLX5_LAG_FLAG_SRIOV = 1 << 1,
}; };
#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV)
struct lag_func { struct lag_func {
struct mlx5_core_dev *dev; struct mlx5_core_dev *dev;
struct net_device *netdev; struct net_device *netdev;
...@@ -161,9 +164,19 @@ static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, ...@@ -161,9 +164,19 @@ static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
return -1; return -1;
} }
static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
{
return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
}
static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
{
return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
}
static bool __mlx5_lag_is_active(struct mlx5_lag *ldev) static bool __mlx5_lag_is_active(struct mlx5_lag *ldev)
{ {
return !!(ldev->flags & MLX5_LAG_FLAG_BONDED); return !!(ldev->flags & MLX5_LAG_MODE_FLAGS);
} }
static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
...@@ -229,9 +242,10 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, ...@@ -229,9 +242,10 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
} }
static void mlx5_activate_lag(struct mlx5_lag *ldev, static void mlx5_activate_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker) struct lag_tracker *tracker,
u8 flags)
{ {
ldev->flags |= MLX5_LAG_FLAG_BONDED; ldev->flags |= flags;
mlx5_create_lag(ldev, tracker); mlx5_create_lag(ldev, tracker);
} }
...@@ -240,7 +254,7 @@ static void mlx5_deactivate_lag(struct mlx5_lag *ldev) ...@@ -240,7 +254,7 @@ static void mlx5_deactivate_lag(struct mlx5_lag *ldev)
struct mlx5_core_dev *dev0 = ldev->pf[0].dev; struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
int err; int err;
ldev->flags &= ~MLX5_LAG_FLAG_BONDED; ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
err = mlx5_cmd_destroy_lag(dev0); err = mlx5_cmd_destroy_lag(dev0);
if (err) if (err)
...@@ -263,15 +277,13 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) ...@@ -263,15 +277,13 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
{ {
struct mlx5_core_dev *dev0 = ldev->pf[0].dev; struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
struct mlx5_core_dev *dev1 = ldev->pf[1].dev; struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
bool do_bond, sriov_enabled;
struct lag_tracker tracker; struct lag_tracker tracker;
bool do_bond, roce_lag;
int i; int i;
if (!dev0 || !dev1) if (!dev0 || !dev1)
return; return;
sriov_enabled = mlx5_sriov_is_enabled(dev0) || mlx5_sriov_is_enabled(dev1);
mutex_lock(&lag_mutex); mutex_lock(&lag_mutex);
tracker = ldev->tracker; tracker = ldev->tracker;
mutex_unlock(&lag_mutex); mutex_unlock(&lag_mutex);
...@@ -279,28 +291,35 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) ...@@ -279,28 +291,35 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
if (do_bond && !__mlx5_lag_is_active(ldev)) { if (do_bond && !__mlx5_lag_is_active(ldev)) {
if (!sriov_enabled) roce_lag = !mlx5_sriov_is_enabled(dev0) &&
!mlx5_sriov_is_enabled(dev1);
if (roce_lag)
for (i = 0; i < MLX5_MAX_PORTS; i++) for (i = 0; i < MLX5_MAX_PORTS; i++)
mlx5_remove_dev_by_protocol(ldev->pf[i].dev, mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
MLX5_INTERFACE_PROTOCOL_IB); MLX5_INTERFACE_PROTOCOL_IB);
mlx5_activate_lag(ldev, &tracker); mlx5_activate_lag(ldev, &tracker,
roce_lag ? MLX5_LAG_FLAG_ROCE :
MLX5_LAG_FLAG_SRIOV);
if (!sriov_enabled) { if (roce_lag) {
mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
mlx5_nic_vport_enable_roce(dev1); mlx5_nic_vport_enable_roce(dev1);
} }
} else if (do_bond && __mlx5_lag_is_active(ldev)) { } else if (do_bond && __mlx5_lag_is_active(ldev)) {
mlx5_modify_lag(ldev, &tracker); mlx5_modify_lag(ldev, &tracker);
} else if (!do_bond && __mlx5_lag_is_active(ldev)) { } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
if (!sriov_enabled) { roce_lag = __mlx5_lag_is_roce(ldev);
if (roce_lag) {
mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
mlx5_nic_vport_disable_roce(dev1); mlx5_nic_vport_disable_roce(dev1);
} }
mlx5_deactivate_lag(ldev); mlx5_deactivate_lag(ldev);
if (!sriov_enabled) if (roce_lag)
for (i = 0; i < MLX5_MAX_PORTS; i++) for (i = 0; i < MLX5_MAX_PORTS; i++)
if (ldev->pf[i].dev) if (ldev->pf[i].dev)
mlx5_add_dev_by_protocol(ldev->pf[i].dev, mlx5_add_dev_by_protocol(ldev->pf[i].dev,
...@@ -572,6 +591,20 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) ...@@ -572,6 +591,20 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev)
} }
} }
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
bool res;
mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev);
res = ldev && __mlx5_lag_is_roce(ldev);
mutex_unlock(&lag_mutex);
return res;
}
EXPORT_SYMBOL(mlx5_lag_is_roce);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev) bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
{ {
struct mlx5_lag *ldev; struct mlx5_lag *ldev;
...@@ -586,6 +619,20 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) ...@@ -586,6 +619,20 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
} }
EXPORT_SYMBOL(mlx5_lag_is_active); EXPORT_SYMBOL(mlx5_lag_is_active);
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
bool res;
mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev);
res = ldev && __mlx5_lag_is_sriov(ldev);
mutex_unlock(&lag_mutex);
return res;
}
EXPORT_SYMBOL(mlx5_lag_is_sriov);
void mlx5_lag_update(struct mlx5_core_dev *dev) void mlx5_lag_update(struct mlx5_core_dev *dev)
{ {
struct mlx5_lag *ldev; struct mlx5_lag *ldev;
...@@ -609,7 +656,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) ...@@ -609,7 +656,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
mutex_lock(&lag_mutex); mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev); ldev = mlx5_lag_dev_get(dev);
if (!(ldev && __mlx5_lag_is_active(ldev))) if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock; goto unlock;
if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
...@@ -638,7 +685,7 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) ...@@ -638,7 +685,7 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
return true; return true;
ldev = mlx5_lag_dev_get(dev); ldev = mlx5_lag_dev_get(dev);
if (!ldev || !__mlx5_lag_is_active(ldev) || ldev->pf[0].dev == dev) if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev)
return true; return true;
/* If bonded, we do not add an IB device for PF1. */ /* If bonded, we do not add an IB device for PF1. */
...@@ -665,7 +712,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ...@@ -665,7 +712,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
mutex_lock(&lag_mutex); mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev); ldev = mlx5_lag_dev_get(dev);
if (ldev && __mlx5_lag_is_active(ldev)) { if (ldev && __mlx5_lag_is_roce(ldev)) {
num_ports = MLX5_MAX_PORTS; num_ports = MLX5_MAX_PORTS;
mdev[0] = ldev->pf[0].dev; mdev[0] = ldev->pf[0].dev;
mdev[1] = ldev->pf[1].dev; mdev[1] = ldev->pf[1].dev;
......
...@@ -1019,6 +1019,8 @@ int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); ...@@ -1019,6 +1019,8 @@ int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id);
int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment